monitoring microservices; new container for overall monitoring of a deployment; helm update; helm local testing

This commit is contained in:
cristi8 2022-05-05 02:01:00 +03:00
Родитель 753b38abb4
Коммит 0a0fe3cb77
28 изменённых файлов: 3236 добавлений и 999 удалений

Просмотреть файл

@ -72,6 +72,7 @@ workflows:
- docker-build-and-publish-webhooks
- docker-build-and-publish-file-imports
- docker-build-and-publish-previews
- docker-build-and-publish-monitor-container
- docker-build-and-publish-test-container
- publish-npm:
@ -194,6 +195,12 @@ jobs:
FOLDER: utils
SPECKLE_SERVER_PACKAGE: test-deployment
docker-build-and-publish-monitor-container:
<<: *docker-job
environment:
FOLDER: utils
SPECKLE_SERVER_PACKAGE: monitor-deployment
publish-npm:
docker: *docker-image
working_directory: *work-dir

Просмотреть файл

@ -27,8 +27,8 @@ fi
rm -rf ~/helm/charts/speckle-server
cp -r utils/helm/speckle-server ~/helm/charts/speckle-server
echo 'version: '$RELEASE_VERSION >> ~/helm/charts/speckle-server/Chart.yaml
echo 'appVersion: "'$RELEASE_VERSION'"' >> ~/helm/charts/speckle-server/Chart.yaml
sed -i 's/version: [^\s]*/version: '$RELEASE_VERSION'/g' ~/helm/charts/speckle-server/Chart.yaml
sed -i 's/appVersion: [^\s]*/appVersion: '$RELEASE_VERSION'/g' ~/helm/charts/speckle-server/Chart.yaml
sed -i 's/docker_image_tag: [^\s]*/docker_image_tag: '$RELEASE_VERSION'/g' ~/helm/charts/speckle-server/values.yaml

16
packages/fileimport-service/.vscode/launch.json поставляемый Normal file
Просмотреть файл

@ -0,0 +1,16 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Launch via NPM",
"request": "launch",
"runtimeArgs": ["run-script", "dev"],
"runtimeExecutable": "npm",
"skipFiles": ["<node_internals>/**"],
"type": "node"
}
]
}

46
packages/fileimport-service/package-lock.json сгенерированный
Просмотреть файл

@ -15,6 +15,7 @@
"knex": "^1.0.3",
"node-fetch": "^2.6.5",
"pg": "^8.7.1",
"prom-client": "^14.0.1",
"valid-filename": "^3.1.0",
"web-ifc": "^0.0.33"
},
@ -295,6 +296,11 @@
"node": ">=8"
}
},
"node_modules/bintrees": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/bintrees/-/bintrees-1.0.1.tgz",
"integrity": "sha1-DmVcm5wkNeqraL9AJyJtK1WjRSQ="
},
"node_modules/boxen": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/boxen/-/boxen-5.1.2.tgz",
@ -2126,6 +2132,17 @@
"url": "https://github.com/prettier/prettier?sponsor=1"
}
},
"node_modules/prom-client": {
"version": "14.0.1",
"resolved": "https://registry.npmjs.org/prom-client/-/prom-client-14.0.1.tgz",
"integrity": "sha512-HxTArb6fkOntQHoRGvv4qd/BkorjliiuO2uSWC2KC17MUTKYttWdDoXX/vxOhQdkoECEM9BBH0pj2l8G8kev6w==",
"dependencies": {
"tdigest": "^0.1.1"
},
"engines": {
"node": ">=10"
}
},
"node_modules/pstree.remy": {
"version": "1.1.8",
"resolved": "https://registry.npmjs.org/pstree.remy/-/pstree.remy-1.1.8.tgz",
@ -2511,6 +2528,14 @@
"node": ">=8.0.0"
}
},
"node_modules/tdigest": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/tdigest/-/tdigest-0.1.1.tgz",
"integrity": "sha1-Ljyyw56kSeVdHmzZEReszKRYgCE=",
"dependencies": {
"bintrees": "1.0.1"
}
},
"node_modules/text-table": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
@ -3063,6 +3088,11 @@
"integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==",
"dev": true
},
"bintrees": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/bintrees/-/bintrees-1.0.1.tgz",
"integrity": "sha1-DmVcm5wkNeqraL9AJyJtK1WjRSQ="
},
"boxen": {
"version": "5.1.2",
"resolved": "https://registry.npmjs.org/boxen/-/boxen-5.1.2.tgz",
@ -4400,6 +4430,14 @@
"integrity": "sha512-m2FgJibYrBGGgQXNzfd0PuDGShJgRavjUoRCw1mZERIWVSXF0iLzLm+aOqTAbLnC3n6JzUhAA8uZnFVghHJ86A==",
"dev": true
},
"prom-client": {
"version": "14.0.1",
"resolved": "https://registry.npmjs.org/prom-client/-/prom-client-14.0.1.tgz",
"integrity": "sha512-HxTArb6fkOntQHoRGvv4qd/BkorjliiuO2uSWC2KC17MUTKYttWdDoXX/vxOhQdkoECEM9BBH0pj2l8G8kev6w==",
"requires": {
"tdigest": "^0.1.1"
}
},
"pstree.remy": {
"version": "1.1.8",
"resolved": "https://registry.npmjs.org/pstree.remy/-/pstree.remy-1.1.8.tgz",
@ -4675,6 +4713,14 @@
"resolved": "https://registry.npmjs.org/tarn/-/tarn-3.0.2.tgz",
"integrity": "sha512-51LAVKUSZSVfI05vjPESNc5vwqqZpbXCsU+/+wxlOrUjk2SnFTt97v9ZgQrD4YmxYW1Px6w2KjaDitCfkvgxMQ=="
},
"tdigest": {
"version": "0.1.1",
"resolved": "https://registry.npmjs.org/tdigest/-/tdigest-0.1.1.tgz",
"integrity": "sha1-Ljyyw56kSeVdHmzZEReszKRYgCE=",
"requires": {
"bintrees": "1.0.1"
}
},
"text-table": {
"version": "0.2.0",
"resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",

Просмотреть файл

@ -28,6 +28,7 @@
"knex": "^1.0.3",
"node-fetch": "^2.6.5",
"pg": "^8.7.1",
"prom-client": "^14.0.1",
"valid-filename": "^3.1.0",
"web-ifc": "^0.0.33"
},

Просмотреть файл

@ -1,6 +1,12 @@
/* eslint-disable no-console */
'use strict'
const {
initPrometheusMetrics,
metricDuration,
metricInputFileSize,
metricOperationErrors
} = require('./prometheusMetrics')
const knex = require('../knex')
const { getFileStream } = require('./filesApi')
@ -39,13 +45,16 @@ async function startTask() {
async function doTask(task) {
let tempUserToken = null
let serverApi = null
let fileTypeForMetric = 'unknown'
let fileSizeForMetric = 0
const metricDurationEnd = metricDuration.startTimer()
try {
console.log('Doing task ', task)
const { rows } = await knex.raw(
`
SELECT
id as "fileId", "streamId", "branchName", "userId", "fileName", "fileType"
id as "fileId", "streamId", "branchName", "userId", "fileName", "fileType", "fileSize"
FROM file_uploads
WHERE id = ?
LIMIT 1
@ -56,6 +65,8 @@ async function doTask(task) {
if (!info) {
throw new Error('Internal error: DB inconsistent')
}
fileTypeForMetric = info.fileType || 'missing_info'
fileSizeForMetric = Number(info.fileSize) || 0
fs.mkdirSync(TMP_INPUT_DIR, { recursive: true })
@ -165,7 +176,10 @@ async function doTask(task) {
`,
[err.toString(), task.id]
)
metricOperationErrors.labels(fileTypeForMetric).inc()
}
metricDurationEnd({ op: fileTypeForMetric })
metricInputFileSize.labels(fileTypeForMetric).observe(fileSizeForMetric)
fs.rmSync(TMP_INPUT_DIR, { force: true, recursive: true })
if (fs.existsSync(TMP_RESULTS_PATH)) fs.unlinkSync(TMP_RESULTS_PATH)
@ -234,6 +248,7 @@ async function tick() {
// Check for another task very soon
setTimeout(tick, 10)
} catch (err) {
metricOperationErrors.labels('main_loop').inc()
console.log('Error executing task: ', err)
setTimeout(tick, 5000)
}
@ -241,6 +256,7 @@ async function tick() {
async function main() {
console.log('Starting FileUploads Service...')
initPrometheusMetrics()
process.on('SIGTERM', () => {
shouldExit = true

Просмотреть файл

@ -0,0 +1,127 @@
/* eslint-disable no-unused-vars */
'use strict'
const http = require('http')
const prometheusClient = require('prom-client')
const knex = require('../knex')
let metricFree = null
let metricUsed = null
let metricPendingAquires = null
let metricQueryDuration = null
let metricQueryErrors = null
const queryStartTime = {}
prometheusClient.register.clear()
prometheusClient.register.setDefaultLabels({
project: 'speckle-server',
app: 'fileimport-service'
})
prometheusClient.collectDefaultMetrics()
let prometheusInitialized = false
function initKnexPrometheusMetrics() {
metricFree = new prometheusClient.Gauge({
name: 'speckle_server_knex_free',
help: 'Number of free DB connections',
collect() {
this.set(knex.client.pool.numFree())
}
})
metricUsed = new prometheusClient.Gauge({
name: 'speckle_server_knex_used',
help: 'Number of used DB connections',
collect() {
this.set(knex.client.pool.numUsed())
}
})
metricPendingAquires = new prometheusClient.Gauge({
name: 'speckle_server_knex_pending',
help: 'Number of pending DB connection aquires',
collect() {
this.set(knex.client.pool.numPendingAcquires())
}
})
metricQueryDuration = new prometheusClient.Summary({
name: 'speckle_server_knex_query_duration',
help: 'Summary of the DB query durations in seconds'
})
metricQueryErrors = new prometheusClient.Counter({
name: 'speckle_server_knex_query_errors',
help: 'Number of DB queries with errors'
})
knex.on('query', (data) => {
const queryId = data.__knexQueryUid + ''
queryStartTime[queryId] = Date.now()
})
knex.on('query-response', (data, obj, builder) => {
const queryId = obj.__knexQueryUid + ''
const durationSec = (Date.now() - queryStartTime[queryId]) / 1000
delete queryStartTime[queryId]
if (!isNaN(durationSec)) metricQueryDuration.observe(durationSec)
})
knex.on('query-error', (err, querySpec) => {
const queryId = querySpec.__knexQueryUid + ''
const durationSec = (Date.now() - queryStartTime[queryId]) / 1000
delete queryStartTime[queryId]
if (!isNaN(durationSec)) metricQueryDuration.observe(durationSec)
metricQueryErrors.inc()
})
}
module.exports = {
initPrometheusMetrics() {
if (prometheusInitialized) return
prometheusInitialized = true
initKnexPrometheusMetrics()
// Define the HTTP server
const server = http.createServer(async (req, res) => {
if (req.url === '/metrics') {
res.setHeader('Content-Type', prometheusClient.register.contentType)
res.end(await prometheusClient.register.metrics())
} else {
res.end('Speckle FileImport Service - prometheus metrics')
}
})
server.listen(Number(process.env.PROMETHEUS_METRICS_PORT) || 9093)
},
metricDuration: new prometheusClient.Histogram({
name: 'speckle_server_operation_duration',
help: 'Summary of the operation durations in seconds',
buckets: [0.5, 1, 5, 10, 30, 60, 300, 600],
labelNames: ['op']
}),
metricOperationErrors: new prometheusClient.Counter({
name: 'speckle_server_operation_errors',
help: 'Number of operations with errors',
labelNames: ['op']
}),
metricInputFileSize: new prometheusClient.Histogram({
name: 'speckle_server_operation_file_size',
help: 'Size of the operation input file size',
buckets: [
1000,
100 * 1000,
500 * 1000,
1000 * 1000,
5 * 1000 * 1000,
10 * 1000 * 1000,
100 * 1000 * 1000
],
labelNames: ['op']
})
}

Просмотреть файл

@ -10,10 +10,6 @@ const indexRouter = require('./routes/index')
const previewRouter = require('./routes/preview')
const objectsRouter = require('./routes/objects')
const apiRouter = require('./routes/api')
const prometheusClient = require('prom-client')
prometheusClient.register.clear()
prometheusClient.collectDefaultMetrics()
const app = express()
@ -29,16 +25,6 @@ app.use('/preview', previewRouter)
app.use('/objects', objectsRouter)
app.use('/api', apiRouter)
// Expose prometheus metrics
app.get('/metrics', async (req, res) => {
try {
res.set('Content-Type', prometheusClient.register.contentType)
res.end(await prometheusClient.register.metrics())
} catch (ex) {
res.status(500).end(ex.message)
}
})
// catch 404 and forward to error handler
app.use(function (req, res, next) {
next(createError(404))

Просмотреть файл

@ -4,6 +4,7 @@ const crypto = require('crypto')
const knex = require('../knex')
const fetch = require('node-fetch')
const fs = require('fs')
const metrics = require('./prometheusMetrics')
let shouldExit = false
@ -78,6 +79,7 @@ async function doTask(task) {
`,
[{}, task.streamId, task.objectId]
)
metrics.metricOperationErrors.labels('preview').inc()
}
}
@ -96,11 +98,16 @@ async function tick() {
return
}
const metricDurationEnd = metrics.metricDuration.startTimer()
await doTask(task)
metricDurationEnd({ op: 'preview' })
// Check for another task very soon
setTimeout(tick, 10)
} catch (err) {
metrics.metricOperationErrors.labels('main_loop').inc()
console.log('Error executing task: ', err)
setTimeout(tick, 5000)
}
@ -119,6 +126,8 @@ async function startPreviewService() {
console.log('Shutting down...')
})
metrics.initPrometheusMetrics()
tick()
}

Просмотреть файл

@ -0,0 +1,112 @@
/* eslint-disable no-unused-vars */
'use strict'
const http = require('http')
const prometheusClient = require('prom-client')
const knex = require('../knex')
let metricFree = null
let metricUsed = null
let metricPendingAquires = null
let metricQueryDuration = null
let metricQueryErrors = null
const queryStartTime = {}
prometheusClient.register.clear()
prometheusClient.register.setDefaultLabels({
project: 'speckle-server',
app: 'preview-service'
})
prometheusClient.collectDefaultMetrics()
let prometheusInitialized = false
function initKnexPrometheusMetrics() {
metricFree = new prometheusClient.Gauge({
name: 'speckle_server_knex_free',
help: 'Number of free DB connections',
collect() {
this.set(knex.client.pool.numFree())
}
})
metricUsed = new prometheusClient.Gauge({
name: 'speckle_server_knex_used',
help: 'Number of used DB connections',
collect() {
this.set(knex.client.pool.numUsed())
}
})
metricPendingAquires = new prometheusClient.Gauge({
name: 'speckle_server_knex_pending',
help: 'Number of pending DB connection aquires',
collect() {
this.set(knex.client.pool.numPendingAcquires())
}
})
metricQueryDuration = new prometheusClient.Summary({
name: 'speckle_server_knex_query_duration',
help: 'Summary of the DB query durations in seconds'
})
metricQueryErrors = new prometheusClient.Counter({
name: 'speckle_server_knex_query_errors',
help: 'Number of DB queries with errors'
})
knex.on('query', (data) => {
const queryId = data.__knexQueryUid + ''
queryStartTime[queryId] = Date.now()
})
knex.on('query-response', (data, obj, builder) => {
const queryId = obj.__knexQueryUid + ''
const durationSec = (Date.now() - queryStartTime[queryId]) / 1000
delete queryStartTime[queryId]
if (!isNaN(durationSec)) metricQueryDuration.observe(durationSec)
})
knex.on('query-error', (err, querySpec) => {
const queryId = querySpec.__knexQueryUid + ''
const durationSec = (Date.now() - queryStartTime[queryId]) / 1000
delete queryStartTime[queryId]
if (!isNaN(durationSec)) metricQueryDuration.observe(durationSec)
metricQueryErrors.inc()
})
}
module.exports = {
initPrometheusMetrics() {
if (prometheusInitialized) return
prometheusInitialized = true
initKnexPrometheusMetrics()
// Define the HTTP server
const server = http.createServer(async (req, res) => {
if (req.url === '/metrics') {
res.setHeader('Content-Type', prometheusClient.register.contentType)
res.end(await prometheusClient.register.metrics())
} else {
res.end('Speckle Preview Service - prometheus metrics')
}
})
server.listen(Number(process.env.PROMETHEUS_METRICS_PORT) || 9094)
},
metricDuration: new prometheusClient.Histogram({
name: 'speckle_server_operation_duration',
help: 'Summary of the operation durations in seconds',
buckets: [0.5, 1, 5, 10, 30, 60, 300, 600],
labelNames: ['op']
}),
metricOperationErrors: new prometheusClient.Counter({
name: 'speckle_server_operation_errors',
help: 'Number of operations with errors',
labelNames: ['op']
})
}

Просмотреть файл

@ -16,8 +16,10 @@ module.exports = {
labelNames: ['route']
})
}
return responseTime(function (req, res, time) {
let route = 'unknown'
if (req.originalUrl === '/graphql') route = '/graphql'
if (req.route && req.route.path) route = req.route.path
metricRequestDuration.labels(route).observe(time / 1000)
})

Просмотреть файл

@ -15,7 +15,12 @@ module.exports = function (app) {
if (!prometheusInitialized) {
prometheusInitialized = true
prometheusClient.register.clear()
prometheusClient.register.setDefaultLabels({
project: 'speckle-server',
app: 'server'
})
prometheusClient.collectDefaultMetrics()
initKnexPrometheusMetrics()
}

3372
packages/webhook-service/package-lock.json сгенерированный

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -24,7 +24,8 @@
"knex": "^1.0.3",
"node-fetch": "^2.6.1",
"pg": "^8.6.0",
"private-ip": "^2.3.3"
"private-ip": "^2.3.3",
"prom-client": "^14.0.1"
},
"devDependencies": {
"cross-env": "^7.0.3",

Просмотреть файл

@ -3,6 +3,7 @@
const crypto = require('crypto')
const knex = require('./knex')
const fs = require('fs')
const metrics = require('./prometheusMetrics')
let shouldExit = false
const HEALTHCHECK_FILE_PATH = '/tmp/last_successful_query'
@ -94,6 +95,7 @@ async function doTask(task) {
`,
[err.toString(), task.id]
)
metrics.metricOperationErrors.labels('webhook').inc()
}
}
@ -112,11 +114,16 @@ async function tick() {
return
}
const metricDurationEnd = metrics.metricDuration.startTimer()
await doTask(task)
metricDurationEnd({ op: 'webhook' })
// Check for another task very soon
setTimeout(tick, 10)
} catch (err) {
metrics.metricOperationErrors.labels('main_loop').inc()
console.log('Error executing task: ', err)
setTimeout(tick, 5000)
}
@ -129,6 +136,7 @@ async function main() {
shouldExit = true
console.log('Shutting down...')
})
metrics.initPrometheusMetrics()
tick()
}

Просмотреть файл

@ -0,0 +1,112 @@
/* eslint-disable no-unused-vars */
'use strict'
const http = require('http')
const prometheusClient = require('prom-client')
const knex = require('./knex')
let metricFree = null
let metricUsed = null
let metricPendingAquires = null
let metricQueryDuration = null
let metricQueryErrors = null
const queryStartTime = {}
prometheusClient.register.clear()
prometheusClient.register.setDefaultLabels({
project: 'speckle-server',
app: 'webhook-service'
})
prometheusClient.collectDefaultMetrics()
let prometheusInitialized = false
function initKnexPrometheusMetrics() {
metricFree = new prometheusClient.Gauge({
name: 'speckle_server_knex_free',
help: 'Number of free DB connections',
collect() {
this.set(knex.client.pool.numFree())
}
})
metricUsed = new prometheusClient.Gauge({
name: 'speckle_server_knex_used',
help: 'Number of used DB connections',
collect() {
this.set(knex.client.pool.numUsed())
}
})
metricPendingAquires = new prometheusClient.Gauge({
name: 'speckle_server_knex_pending',
help: 'Number of pending DB connection aquires',
collect() {
this.set(knex.client.pool.numPendingAcquires())
}
})
metricQueryDuration = new prometheusClient.Summary({
name: 'speckle_server_knex_query_duration',
help: 'Summary of the DB query durations in seconds'
})
metricQueryErrors = new prometheusClient.Counter({
name: 'speckle_server_knex_query_errors',
help: 'Number of DB queries with errors'
})
knex.on('query', (data) => {
const queryId = data.__knexQueryUid + ''
queryStartTime[queryId] = Date.now()
})
knex.on('query-response', (data, obj, builder) => {
const queryId = obj.__knexQueryUid + ''
const durationSec = (Date.now() - queryStartTime[queryId]) / 1000
delete queryStartTime[queryId]
if (!isNaN(durationSec)) metricQueryDuration.observe(durationSec)
})
knex.on('query-error', (err, querySpec) => {
const queryId = querySpec.__knexQueryUid + ''
const durationSec = (Date.now() - queryStartTime[queryId]) / 1000
delete queryStartTime[queryId]
if (!isNaN(durationSec)) metricQueryDuration.observe(durationSec)
metricQueryErrors.inc()
})
}
module.exports = {
initPrometheusMetrics() {
if (prometheusInitialized) return
prometheusInitialized = true
initKnexPrometheusMetrics()
// Define the HTTP server
const server = http.createServer(async (req, res) => {
if (req.url === '/metrics') {
res.setHeader('Content-Type', prometheusClient.register.contentType)
res.end(await prometheusClient.register.metrics())
} else {
res.end('Speckle Webhook Service - prometheus metrics')
}
})
server.listen(Number(process.env.PROMETHEUS_METRICS_PORT) || 9095)
},
metricDuration: new prometheusClient.Histogram({
name: 'speckle_server_operation_duration',
help: 'Summary of the operation durations in seconds',
buckets: [0.5, 1, 5, 10, 30, 60, 300, 600],
labelNames: ['op']
}),
metricOperationErrors: new prometheusClient.Counter({
name: 'speckle_server_operation_errors',
help: 'Number of operations with errors',
labelNames: ['op']
})
}

50
utils/helm/Makefile Normal file
Просмотреть файл

@ -0,0 +1,50 @@
# This file is only useful to test the helm chart locally with minikube before committing changes
build:
cd ../.. && docker build -t speckle/speckle-frontend:local -f packages/frontend/Dockerfile .
cd ../.. && docker build -t speckle/speckle-server:local -f packages/server/Dockerfile .
cd ../.. && docker build -t speckle/speckle-preview-service:local -f packages/preview-service/Dockerfile .
cd ../.. && docker build -t speckle/speckle-webhook-service:local -f packages/webhook-service/Dockerfile .
cd ../.. && docker build -t speckle/speckle-fileimport-service:local -f packages/fileimport-service/Dockerfile .
cd ../.. && docker build -t speckle/speckle-monitor-deployment:local -f utils/monitor-deployment/Dockerfile .
echo "Making locally built images available inside minikube cluster. This takes a bit to copy, unfortunately..."
minikube image load speckle/speckle-frontend:local
minikube image load speckle/speckle-server:local
minikube image load speckle/speckle-preview-service:local
minikube image load speckle/speckle-webhook-service:local
minikube image load speckle/speckle-fileimport-service:local
minikube image load speckle/speckle-monitor-deployment:local
install:
helm uninstall speckle-test 2>/dev/null || true
helm install -f test-values.yml speckle-test ./speckle-server
@echo Make sure you add these lines in /etc/hosts:
@echo \# ---------------------
@echo `minikube ip` speckle.minikube
@echo `minikube ip` grafana.minikube
@echo `minikube ip` prometheus.minikube
@echo \# ---------------------
@echo Installed successfully.
@echo You can access the prometheus server at http://prometheus.minikube/
@echo You can access grafana at http://grafana.minikube/ \(user: admin, password: prom-operator\)
@echo You can access the Speckle server at http://speckle.minikube/
# If you have an existing minikube, you can `minikube delete` and `minikube start` to start a fresh cluster
# This "make target" will install all requirements in a fresh cluster
init-local-minikube:
@(kubectl config current-context | grep -q minikube) || (echo "ERROR: Minikube is not the current kubectl context. Temporarily modify the makefile if you really want to use the current configured kubectl context" && exit 1)
@echo "Enabling nginx ingress minikube plugin..."
minikube addons enable ingress
@echo "Installing kube-prometheus-stack"
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack
kubectl apply -f test-init.yml

Просмотреть файл

@ -8,7 +8,7 @@ type: application
# Versions are expected to follow Semantic Versioning (https://semver.org/)
# Set by the build process to the correct value
# version: 0.1.0
version: 0.1.0-local
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
@ -16,4 +16,4 @@ type: application
# It is recommended to use it with quotes.
# Set by the build process to the correct value
# appVersion: "2.3.3"
appVersion: '0.1.0-local'

Просмотреть файл

@ -77,7 +77,11 @@ spec:
env:
- name: CANONICAL_URL
{{- if .Values.ssl_canonical_url }}
value: https://{{ .Values.domain }}
{{- else }}
value: http://{{ .Values.domain }}
{{- end }}
- name: PORT
value: "3000"

Просмотреть файл

@ -0,0 +1,61 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: speckle-monitoring
namespace: {{ .Values.namespace }}
labels:
app: speckle-monitoring
project: speckle-server
spec:
replicas: 1
selector:
matchLabels:
app: speckle-monitoring
project: speckle-server
template:
metadata:
labels:
app: speckle-monitoring
project: speckle-server
spec:
priorityClassName: low-priority
{{- if .Values.db.useCertificate }}
volumes:
- name: postgres-certificate
configMap:
name: postgres-certificate
{{- end }}
terminationGracePeriodSeconds: 10
containers:
- name: main
image: speckle/speckle-monitor-deployment:{{ .Values.docker_image_tag }}
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 200m
memory: 512Mi
{{- if .Values.db.useCertificate }}
volumeMounts:
- name: postgres-certificate
mountPath: /postgres-certificate
{{- end }}
env:
- name: PG_CONNECTION_STRING
valueFrom:
secretKeyRef:
name: {{ .Values.secretName }}
key: postgres_url
{{- if .Values.db.useCertificate }}
- name: NODE_EXTRA_CA_CERTS
value: "/postgres-certificate/ca-certificate.crt"
{{- end }}

Просмотреть файл

@ -4,16 +4,20 @@ metadata:
name: speckle-server
namespace: {{ .Values.namespace }}
annotations:
{{- if .Values.cert_manager_issuer }}
cert-manager.io/cluster-issuer: {{ .Values.cert_manager_issuer }}
{{- end }}
nginx.ingress.kubernetes.io/proxy-body-size: "100m"
nginx.org/client-max-body-size: "100m"
nginx.ingress.kubernetes.io/use-regex: "true"
spec:
ingressClassName: nginx
{{- if .Values.cert_manager_issuer }}
tls:
- hosts:
- {{ .Values.domain }}
secretName: server-tls
{{- end }}
rules:
- host: {{ .Values.domain }}
http:

Просмотреть файл

@ -33,3 +33,75 @@ spec:
name: www
port: 80
targetPort: 80
---
apiVersion: v1
kind: Service
metadata:
name: speckle-preview-service-metrics
namespace: {{ .Values.namespace }}
labels:
app: speckle-preview-service-metrics
project: speckle-server
spec:
selector:
app: speckle-preview-service
project: speckle-server
ports:
- protocol: TCP
name: web
port: 9094
targetPort: 9094
---
apiVersion: v1
kind: Service
metadata:
name: speckle-fileimport-service-metrics
namespace: {{ .Values.namespace }}
labels:
app: speckle-fileimport-service-metrics
project: speckle-server
spec:
selector:
app: speckle-fileimport-service
project: speckle-server
ports:
- protocol: TCP
name: web
port: 9093
targetPort: 9093
---
apiVersion: v1
kind: Service
metadata:
name: speckle-webhook-service-metrics
namespace: {{ .Values.namespace }}
labels:
app: speckle-webhook-service
project: speckle-server
spec:
selector:
app: speckle-webhook-service
project: speckle-server
ports:
- protocol: TCP
name: web
port: 9095
targetPort: 9095
---
apiVersion: v1
kind: Service
metadata:
name: speckle-monitoring-metrics
namespace: {{ .Values.namespace }}
labels:
app: speckle-monitoring
project: speckle-server
spec:
selector:
app: speckle-monitoring
project: speckle-server
ports:
- protocol: TCP
name: web
port: 9092
targetPort: 9092

Просмотреть файл

@ -1,6 +1,7 @@
namespace: speckle-test
domain: localhost
ssl_canonical_url: true
docker_image_tag: v2.3.3

Просмотреть файл

@ -1,3 +1,27 @@
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: high-priority
value: 100
globalDefault: false
description: 'High priority (100) for business-critical services'
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: medium-priority
value: 50
globalDefault: true
description: 'Medium priority (50) - dev/test services'
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: low-priority
value: -100
globalDefault: false
description: 'Low priority (-100) - Non-critical microservices'
---
apiVersion: v1
kind: Namespace
metadata:
@ -163,6 +187,42 @@ spec:
port: 9001
targetPort: 9001
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: grafana
spec:
ingressClassName: nginx
rules:
- host: grafana.minikube
http:
paths:
- pathType: Prefix
path: '/'
backend:
service:
name: kube-prometheus-stack-grafana
port:
number: 80
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: prometheus
spec:
ingressClassName: nginx
rules:
- host: prometheus.minikube
http:
paths:
- pathType: Prefix
path: '/'
backend:
service:
name: prometheus-operated
port:
number: 9090
---
apiVersion: v1
kind: Secret
metadata:

Просмотреть файл

@ -1,6 +1,7 @@
namespace: speckle-test
domain: myspeckleserver
domain: speckle.minikube
ssl_canonical_url: false
docker_image_tag: 'local'
@ -13,4 +14,5 @@ s3:
access_key: 'minioadmin'
create_bucket: 'true'
cert_manager_issuer: letsencrypt-staging
cert_manager_issuer: ~
enable_prometheus_monitoring: true

Просмотреть файл

@ -0,0 +1,8 @@
FROM python:3.8-slim
RUN pip install psycopg2-binary prometheus-client
COPY utils/monitor-deployment/src /app
WORKDIR /app
CMD ["python", "-u", "run.py"]

Просмотреть файл

@ -0,0 +1,4 @@
#!/bin/bash
export PG_CONNECTION_STRING=postgres://speckle:speckle@localhost/speckle
cd src && python3 -u run.py

Просмотреть файл

@ -0,0 +1,105 @@
#!/usr/bin/env python
import os
import psycopg2
from prometheus_client import start_http_server, Gauge
import time
import logging
LOG = logging.getLogger(__name__)
PG_CONNECTION_STRING = os.environ['PG_CONNECTION_STRING']
PROM = {
'db_size': Gauge('speckle_db_size', 'Size of the entire database (in bytes)'),
'objects': Gauge('speckle_db_objects', 'Number of objects'),
'streams': Gauge('speckle_db_streams', 'Number of streams'),
'commits': Gauge('speckle_db_commits', 'Number of commits'),
'users': Gauge('speckle_db_users', 'Number of users'),
'fileimports': Gauge('speckle_db_fileimports', 'Number of imported files, by type and status', labelnames=('filetype','status')),
'webhooks': Gauge('speckle_db_webhooks', 'Number of webhook calls, by status', labelnames=('status',)),
'previews': Gauge('speckle_db_previews', 'Number of previews, by status', labelnames=('status',)),
'filesize': Gauge('speckle_db_filesize', 'Size of imported files, by type (in bytes)', labelnames=('filetype',)),
}
def tick(cur):
# Total DB size
cur.execute('SELECT pg_database_size(%s)', (cur.connection.info.dbname,))
PROM['db_size'].set(cur.fetchone()[0])
# Counts for users, streams, commits, objects
cur.execute("SELECT count(*) FROM objects;")
PROM['objects'].set(cur.fetchone()[0])
cur.execute("SELECT count(*) FROM streams;")
PROM['streams'].set(cur.fetchone()[0])
cur.execute("SELECT count(*) FROM commits;")
PROM['commits'].set(cur.fetchone()[0])
cur.execute("SELECT count(*) FROM users;")
PROM['users'].set(cur.fetchone()[0])
# File Imports
cur.execute(
'''
SELECT "fileType", "convertedStatus", count(*)
FROM file_uploads
GROUP BY ("fileType", "convertedStatus")
'''
)
for row in cur:
PROM['fileimports'].labels(row[0], str(row[1])).set(row[2])
cur.execute(
'''
SELECT "fileType", SUM("fileSize")
FROM file_uploads
GROUP BY "fileType"
'''
)
for row in cur:
PROM['filesize'].labels(row[0]).set(row[1])
# Webhooks
cur.execute(
'''
SELECT status, count(*)
FROM webhooks_events
GROUP BY status
'''
)
for row in cur:
PROM['webhooks'].labels(str(row[0])).set(row[1])
# Previews
cur.execute(
'''
SELECT "previewStatus", count(*)
FROM object_preview
GROUP BY "previewStatus"
'''
)
for row in cur:
PROM['previews'].labels(str(row[0])).set(row[1])
def main():
start_http_server(9092)
while True:
conn = None
cur = None
try:
conn = psycopg2.connect(PG_CONNECTION_STRING)
cur = conn.cursor()
tick(cur)
except Exception as ex:
LOG.error("Error: %s", str(ex))
finally:
if cur:
cur.close()
if conn:
conn.close()
time.sleep(60)
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
main()