monitoring microservices; new container for overall monitoring of a deployment; helm update; helm local testing
This commit is contained in:
Родитель
753b38abb4
Коммит
0a0fe3cb77
|
@ -72,6 +72,7 @@ workflows:
|
|||
- docker-build-and-publish-webhooks
|
||||
- docker-build-and-publish-file-imports
|
||||
- docker-build-and-publish-previews
|
||||
- docker-build-and-publish-monitor-container
|
||||
- docker-build-and-publish-test-container
|
||||
|
||||
- publish-npm:
|
||||
|
@ -194,6 +195,12 @@ jobs:
|
|||
FOLDER: utils
|
||||
SPECKLE_SERVER_PACKAGE: test-deployment
|
||||
|
||||
docker-build-and-publish-monitor-container:
|
||||
<<: *docker-job
|
||||
environment:
|
||||
FOLDER: utils
|
||||
SPECKLE_SERVER_PACKAGE: monitor-deployment
|
||||
|
||||
publish-npm:
|
||||
docker: *docker-image
|
||||
working_directory: *work-dir
|
||||
|
|
|
@ -27,8 +27,8 @@ fi
|
|||
rm -rf ~/helm/charts/speckle-server
|
||||
cp -r utils/helm/speckle-server ~/helm/charts/speckle-server
|
||||
|
||||
echo 'version: '$RELEASE_VERSION >> ~/helm/charts/speckle-server/Chart.yaml
|
||||
echo 'appVersion: "'$RELEASE_VERSION'"' >> ~/helm/charts/speckle-server/Chart.yaml
|
||||
sed -i 's/version: [^\s]*/version: '$RELEASE_VERSION'/g' ~/helm/charts/speckle-server/Chart.yaml
|
||||
sed -i 's/appVersion: [^\s]*/appVersion: '$RELEASE_VERSION'/g' ~/helm/charts/speckle-server/Chart.yaml
|
||||
|
||||
sed -i 's/docker_image_tag: [^\s]*/docker_image_tag: '$RELEASE_VERSION'/g' ~/helm/charts/speckle-server/values.yaml
|
||||
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Launch via NPM",
|
||||
"request": "launch",
|
||||
"runtimeArgs": ["run-script", "dev"],
|
||||
"runtimeExecutable": "npm",
|
||||
"skipFiles": ["<node_internals>/**"],
|
||||
"type": "node"
|
||||
}
|
||||
]
|
||||
}
|
|
@ -15,6 +15,7 @@
|
|||
"knex": "^1.0.3",
|
||||
"node-fetch": "^2.6.5",
|
||||
"pg": "^8.7.1",
|
||||
"prom-client": "^14.0.1",
|
||||
"valid-filename": "^3.1.0",
|
||||
"web-ifc": "^0.0.33"
|
||||
},
|
||||
|
@ -295,6 +296,11 @@
|
|||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/bintrees": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/bintrees/-/bintrees-1.0.1.tgz",
|
||||
"integrity": "sha1-DmVcm5wkNeqraL9AJyJtK1WjRSQ="
|
||||
},
|
||||
"node_modules/boxen": {
|
||||
"version": "5.1.2",
|
||||
"resolved": "https://registry.npmjs.org/boxen/-/boxen-5.1.2.tgz",
|
||||
|
@ -2126,6 +2132,17 @@
|
|||
"url": "https://github.com/prettier/prettier?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/prom-client": {
|
||||
"version": "14.0.1",
|
||||
"resolved": "https://registry.npmjs.org/prom-client/-/prom-client-14.0.1.tgz",
|
||||
"integrity": "sha512-HxTArb6fkOntQHoRGvv4qd/BkorjliiuO2uSWC2KC17MUTKYttWdDoXX/vxOhQdkoECEM9BBH0pj2l8G8kev6w==",
|
||||
"dependencies": {
|
||||
"tdigest": "^0.1.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10"
|
||||
}
|
||||
},
|
||||
"node_modules/pstree.remy": {
|
||||
"version": "1.1.8",
|
||||
"resolved": "https://registry.npmjs.org/pstree.remy/-/pstree.remy-1.1.8.tgz",
|
||||
|
@ -2511,6 +2528,14 @@
|
|||
"node": ">=8.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/tdigest": {
|
||||
"version": "0.1.1",
|
||||
"resolved": "https://registry.npmjs.org/tdigest/-/tdigest-0.1.1.tgz",
|
||||
"integrity": "sha1-Ljyyw56kSeVdHmzZEReszKRYgCE=",
|
||||
"dependencies": {
|
||||
"bintrees": "1.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/text-table": {
|
||||
"version": "0.2.0",
|
||||
"resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
|
||||
|
@ -3063,6 +3088,11 @@
|
|||
"integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==",
|
||||
"dev": true
|
||||
},
|
||||
"bintrees": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/bintrees/-/bintrees-1.0.1.tgz",
|
||||
"integrity": "sha1-DmVcm5wkNeqraL9AJyJtK1WjRSQ="
|
||||
},
|
||||
"boxen": {
|
||||
"version": "5.1.2",
|
||||
"resolved": "https://registry.npmjs.org/boxen/-/boxen-5.1.2.tgz",
|
||||
|
@ -4400,6 +4430,14 @@
|
|||
"integrity": "sha512-m2FgJibYrBGGgQXNzfd0PuDGShJgRavjUoRCw1mZERIWVSXF0iLzLm+aOqTAbLnC3n6JzUhAA8uZnFVghHJ86A==",
|
||||
"dev": true
|
||||
},
|
||||
"prom-client": {
|
||||
"version": "14.0.1",
|
||||
"resolved": "https://registry.npmjs.org/prom-client/-/prom-client-14.0.1.tgz",
|
||||
"integrity": "sha512-HxTArb6fkOntQHoRGvv4qd/BkorjliiuO2uSWC2KC17MUTKYttWdDoXX/vxOhQdkoECEM9BBH0pj2l8G8kev6w==",
|
||||
"requires": {
|
||||
"tdigest": "^0.1.1"
|
||||
}
|
||||
},
|
||||
"pstree.remy": {
|
||||
"version": "1.1.8",
|
||||
"resolved": "https://registry.npmjs.org/pstree.remy/-/pstree.remy-1.1.8.tgz",
|
||||
|
@ -4675,6 +4713,14 @@
|
|||
"resolved": "https://registry.npmjs.org/tarn/-/tarn-3.0.2.tgz",
|
||||
"integrity": "sha512-51LAVKUSZSVfI05vjPESNc5vwqqZpbXCsU+/+wxlOrUjk2SnFTt97v9ZgQrD4YmxYW1Px6w2KjaDitCfkvgxMQ=="
|
||||
},
|
||||
"tdigest": {
|
||||
"version": "0.1.1",
|
||||
"resolved": "https://registry.npmjs.org/tdigest/-/tdigest-0.1.1.tgz",
|
||||
"integrity": "sha1-Ljyyw56kSeVdHmzZEReszKRYgCE=",
|
||||
"requires": {
|
||||
"bintrees": "1.0.1"
|
||||
}
|
||||
},
|
||||
"text-table": {
|
||||
"version": "0.2.0",
|
||||
"resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz",
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
"knex": "^1.0.3",
|
||||
"node-fetch": "^2.6.5",
|
||||
"pg": "^8.7.1",
|
||||
"prom-client": "^14.0.1",
|
||||
"valid-filename": "^3.1.0",
|
||||
"web-ifc": "^0.0.33"
|
||||
},
|
||||
|
|
|
@ -1,6 +1,12 @@
|
|||
/* eslint-disable no-console */
|
||||
'use strict'
|
||||
|
||||
const {
|
||||
initPrometheusMetrics,
|
||||
metricDuration,
|
||||
metricInputFileSize,
|
||||
metricOperationErrors
|
||||
} = require('./prometheusMetrics')
|
||||
const knex = require('../knex')
|
||||
|
||||
const { getFileStream } = require('./filesApi')
|
||||
|
@ -39,13 +45,16 @@ async function startTask() {
|
|||
async function doTask(task) {
|
||||
let tempUserToken = null
|
||||
let serverApi = null
|
||||
let fileTypeForMetric = 'unknown'
|
||||
let fileSizeForMetric = 0
|
||||
|
||||
const metricDurationEnd = metricDuration.startTimer()
|
||||
try {
|
||||
console.log('Doing task ', task)
|
||||
const { rows } = await knex.raw(
|
||||
`
|
||||
SELECT
|
||||
id as "fileId", "streamId", "branchName", "userId", "fileName", "fileType"
|
||||
id as "fileId", "streamId", "branchName", "userId", "fileName", "fileType", "fileSize"
|
||||
FROM file_uploads
|
||||
WHERE id = ?
|
||||
LIMIT 1
|
||||
|
@ -56,6 +65,8 @@ async function doTask(task) {
|
|||
if (!info) {
|
||||
throw new Error('Internal error: DB inconsistent')
|
||||
}
|
||||
fileTypeForMetric = info.fileType || 'missing_info'
|
||||
fileSizeForMetric = Number(info.fileSize) || 0
|
||||
|
||||
fs.mkdirSync(TMP_INPUT_DIR, { recursive: true })
|
||||
|
||||
|
@ -165,7 +176,10 @@ async function doTask(task) {
|
|||
`,
|
||||
[err.toString(), task.id]
|
||||
)
|
||||
metricOperationErrors.labels(fileTypeForMetric).inc()
|
||||
}
|
||||
metricDurationEnd({ op: fileTypeForMetric })
|
||||
metricInputFileSize.labels(fileTypeForMetric).observe(fileSizeForMetric)
|
||||
|
||||
fs.rmSync(TMP_INPUT_DIR, { force: true, recursive: true })
|
||||
if (fs.existsSync(TMP_RESULTS_PATH)) fs.unlinkSync(TMP_RESULTS_PATH)
|
||||
|
@ -234,6 +248,7 @@ async function tick() {
|
|||
// Check for another task very soon
|
||||
setTimeout(tick, 10)
|
||||
} catch (err) {
|
||||
metricOperationErrors.labels('main_loop').inc()
|
||||
console.log('Error executing task: ', err)
|
||||
setTimeout(tick, 5000)
|
||||
}
|
||||
|
@ -241,6 +256,7 @@ async function tick() {
|
|||
|
||||
async function main() {
|
||||
console.log('Starting FileUploads Service...')
|
||||
initPrometheusMetrics()
|
||||
|
||||
process.on('SIGTERM', () => {
|
||||
shouldExit = true
|
||||
|
|
|
@ -0,0 +1,127 @@
|
|||
/* eslint-disable no-unused-vars */
|
||||
'use strict'
|
||||
|
||||
const http = require('http')
|
||||
const prometheusClient = require('prom-client')
|
||||
const knex = require('../knex')
|
||||
|
||||
let metricFree = null
|
||||
let metricUsed = null
|
||||
let metricPendingAquires = null
|
||||
let metricQueryDuration = null
|
||||
let metricQueryErrors = null
|
||||
|
||||
const queryStartTime = {}
|
||||
prometheusClient.register.clear()
|
||||
prometheusClient.register.setDefaultLabels({
|
||||
project: 'speckle-server',
|
||||
app: 'fileimport-service'
|
||||
})
|
||||
prometheusClient.collectDefaultMetrics()
|
||||
|
||||
let prometheusInitialized = false
|
||||
|
||||
function initKnexPrometheusMetrics() {
|
||||
metricFree = new prometheusClient.Gauge({
|
||||
name: 'speckle_server_knex_free',
|
||||
help: 'Number of free DB connections',
|
||||
collect() {
|
||||
this.set(knex.client.pool.numFree())
|
||||
}
|
||||
})
|
||||
|
||||
metricUsed = new prometheusClient.Gauge({
|
||||
name: 'speckle_server_knex_used',
|
||||
help: 'Number of used DB connections',
|
||||
collect() {
|
||||
this.set(knex.client.pool.numUsed())
|
||||
}
|
||||
})
|
||||
|
||||
metricPendingAquires = new prometheusClient.Gauge({
|
||||
name: 'speckle_server_knex_pending',
|
||||
help: 'Number of pending DB connection aquires',
|
||||
collect() {
|
||||
this.set(knex.client.pool.numPendingAcquires())
|
||||
}
|
||||
})
|
||||
|
||||
metricQueryDuration = new prometheusClient.Summary({
|
||||
name: 'speckle_server_knex_query_duration',
|
||||
help: 'Summary of the DB query durations in seconds'
|
||||
})
|
||||
|
||||
metricQueryErrors = new prometheusClient.Counter({
|
||||
name: 'speckle_server_knex_query_errors',
|
||||
help: 'Number of DB queries with errors'
|
||||
})
|
||||
|
||||
knex.on('query', (data) => {
|
||||
const queryId = data.__knexQueryUid + ''
|
||||
queryStartTime[queryId] = Date.now()
|
||||
})
|
||||
|
||||
knex.on('query-response', (data, obj, builder) => {
|
||||
const queryId = obj.__knexQueryUid + ''
|
||||
const durationSec = (Date.now() - queryStartTime[queryId]) / 1000
|
||||
delete queryStartTime[queryId]
|
||||
if (!isNaN(durationSec)) metricQueryDuration.observe(durationSec)
|
||||
})
|
||||
|
||||
knex.on('query-error', (err, querySpec) => {
|
||||
const queryId = querySpec.__knexQueryUid + ''
|
||||
const durationSec = (Date.now() - queryStartTime[queryId]) / 1000
|
||||
delete queryStartTime[queryId]
|
||||
|
||||
if (!isNaN(durationSec)) metricQueryDuration.observe(durationSec)
|
||||
metricQueryErrors.inc()
|
||||
})
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
initPrometheusMetrics() {
|
||||
if (prometheusInitialized) return
|
||||
prometheusInitialized = true
|
||||
|
||||
initKnexPrometheusMetrics()
|
||||
|
||||
// Define the HTTP server
|
||||
const server = http.createServer(async (req, res) => {
|
||||
if (req.url === '/metrics') {
|
||||
res.setHeader('Content-Type', prometheusClient.register.contentType)
|
||||
res.end(await prometheusClient.register.metrics())
|
||||
} else {
|
||||
res.end('Speckle FileImport Service - prometheus metrics')
|
||||
}
|
||||
})
|
||||
server.listen(Number(process.env.PROMETHEUS_METRICS_PORT) || 9093)
|
||||
},
|
||||
|
||||
metricDuration: new prometheusClient.Histogram({
|
||||
name: 'speckle_server_operation_duration',
|
||||
help: 'Summary of the operation durations in seconds',
|
||||
buckets: [0.5, 1, 5, 10, 30, 60, 300, 600],
|
||||
labelNames: ['op']
|
||||
}),
|
||||
|
||||
metricOperationErrors: new prometheusClient.Counter({
|
||||
name: 'speckle_server_operation_errors',
|
||||
help: 'Number of operations with errors',
|
||||
labelNames: ['op']
|
||||
}),
|
||||
|
||||
metricInputFileSize: new prometheusClient.Histogram({
|
||||
name: 'speckle_server_operation_file_size',
|
||||
help: 'Size of the operation input file size',
|
||||
buckets: [
|
||||
1000,
|
||||
100 * 1000,
|
||||
500 * 1000,
|
||||
1000 * 1000,
|
||||
5 * 1000 * 1000,
|
||||
10 * 1000 * 1000,
|
||||
100 * 1000 * 1000
|
||||
],
|
||||
labelNames: ['op']
|
||||
})
|
||||
}
|
|
@ -10,10 +10,6 @@ const indexRouter = require('./routes/index')
|
|||
const previewRouter = require('./routes/preview')
|
||||
const objectsRouter = require('./routes/objects')
|
||||
const apiRouter = require('./routes/api')
|
||||
const prometheusClient = require('prom-client')
|
||||
|
||||
prometheusClient.register.clear()
|
||||
prometheusClient.collectDefaultMetrics()
|
||||
|
||||
const app = express()
|
||||
|
||||
|
@ -29,16 +25,6 @@ app.use('/preview', previewRouter)
|
|||
app.use('/objects', objectsRouter)
|
||||
app.use('/api', apiRouter)
|
||||
|
||||
// Expose prometheus metrics
|
||||
app.get('/metrics', async (req, res) => {
|
||||
try {
|
||||
res.set('Content-Type', prometheusClient.register.contentType)
|
||||
res.end(await prometheusClient.register.metrics())
|
||||
} catch (ex) {
|
||||
res.status(500).end(ex.message)
|
||||
}
|
||||
})
|
||||
|
||||
// catch 404 and forward to error handler
|
||||
app.use(function (req, res, next) {
|
||||
next(createError(404))
|
||||
|
|
|
@ -4,6 +4,7 @@ const crypto = require('crypto')
|
|||
const knex = require('../knex')
|
||||
const fetch = require('node-fetch')
|
||||
const fs = require('fs')
|
||||
const metrics = require('./prometheusMetrics')
|
||||
|
||||
let shouldExit = false
|
||||
|
||||
|
@ -78,6 +79,7 @@ async function doTask(task) {
|
|||
`,
|
||||
[{}, task.streamId, task.objectId]
|
||||
)
|
||||
metrics.metricOperationErrors.labels('preview').inc()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -96,11 +98,16 @@ async function tick() {
|
|||
return
|
||||
}
|
||||
|
||||
const metricDurationEnd = metrics.metricDuration.startTimer()
|
||||
|
||||
await doTask(task)
|
||||
|
||||
metricDurationEnd({ op: 'preview' })
|
||||
|
||||
// Check for another task very soon
|
||||
setTimeout(tick, 10)
|
||||
} catch (err) {
|
||||
metrics.metricOperationErrors.labels('main_loop').inc()
|
||||
console.log('Error executing task: ', err)
|
||||
setTimeout(tick, 5000)
|
||||
}
|
||||
|
@ -119,6 +126,8 @@ async function startPreviewService() {
|
|||
console.log('Shutting down...')
|
||||
})
|
||||
|
||||
metrics.initPrometheusMetrics()
|
||||
|
||||
tick()
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
/* eslint-disable no-unused-vars */
|
||||
'use strict'
|
||||
|
||||
const http = require('http')
|
||||
const prometheusClient = require('prom-client')
|
||||
const knex = require('../knex')
|
||||
|
||||
let metricFree = null
|
||||
let metricUsed = null
|
||||
let metricPendingAquires = null
|
||||
let metricQueryDuration = null
|
||||
let metricQueryErrors = null
|
||||
|
||||
const queryStartTime = {}
|
||||
prometheusClient.register.clear()
|
||||
prometheusClient.register.setDefaultLabels({
|
||||
project: 'speckle-server',
|
||||
app: 'preview-service'
|
||||
})
|
||||
prometheusClient.collectDefaultMetrics()
|
||||
|
||||
let prometheusInitialized = false
|
||||
|
||||
function initKnexPrometheusMetrics() {
|
||||
metricFree = new prometheusClient.Gauge({
|
||||
name: 'speckle_server_knex_free',
|
||||
help: 'Number of free DB connections',
|
||||
collect() {
|
||||
this.set(knex.client.pool.numFree())
|
||||
}
|
||||
})
|
||||
|
||||
metricUsed = new prometheusClient.Gauge({
|
||||
name: 'speckle_server_knex_used',
|
||||
help: 'Number of used DB connections',
|
||||
collect() {
|
||||
this.set(knex.client.pool.numUsed())
|
||||
}
|
||||
})
|
||||
|
||||
metricPendingAquires = new prometheusClient.Gauge({
|
||||
name: 'speckle_server_knex_pending',
|
||||
help: 'Number of pending DB connection aquires',
|
||||
collect() {
|
||||
this.set(knex.client.pool.numPendingAcquires())
|
||||
}
|
||||
})
|
||||
|
||||
metricQueryDuration = new prometheusClient.Summary({
|
||||
name: 'speckle_server_knex_query_duration',
|
||||
help: 'Summary of the DB query durations in seconds'
|
||||
})
|
||||
|
||||
metricQueryErrors = new prometheusClient.Counter({
|
||||
name: 'speckle_server_knex_query_errors',
|
||||
help: 'Number of DB queries with errors'
|
||||
})
|
||||
|
||||
knex.on('query', (data) => {
|
||||
const queryId = data.__knexQueryUid + ''
|
||||
queryStartTime[queryId] = Date.now()
|
||||
})
|
||||
|
||||
knex.on('query-response', (data, obj, builder) => {
|
||||
const queryId = obj.__knexQueryUid + ''
|
||||
const durationSec = (Date.now() - queryStartTime[queryId]) / 1000
|
||||
delete queryStartTime[queryId]
|
||||
if (!isNaN(durationSec)) metricQueryDuration.observe(durationSec)
|
||||
})
|
||||
|
||||
knex.on('query-error', (err, querySpec) => {
|
||||
const queryId = querySpec.__knexQueryUid + ''
|
||||
const durationSec = (Date.now() - queryStartTime[queryId]) / 1000
|
||||
delete queryStartTime[queryId]
|
||||
|
||||
if (!isNaN(durationSec)) metricQueryDuration.observe(durationSec)
|
||||
metricQueryErrors.inc()
|
||||
})
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
initPrometheusMetrics() {
|
||||
if (prometheusInitialized) return
|
||||
prometheusInitialized = true
|
||||
|
||||
initKnexPrometheusMetrics()
|
||||
|
||||
// Define the HTTP server
|
||||
const server = http.createServer(async (req, res) => {
|
||||
if (req.url === '/metrics') {
|
||||
res.setHeader('Content-Type', prometheusClient.register.contentType)
|
||||
res.end(await prometheusClient.register.metrics())
|
||||
} else {
|
||||
res.end('Speckle Preview Service - prometheus metrics')
|
||||
}
|
||||
})
|
||||
server.listen(Number(process.env.PROMETHEUS_METRICS_PORT) || 9094)
|
||||
},
|
||||
|
||||
metricDuration: new prometheusClient.Histogram({
|
||||
name: 'speckle_server_operation_duration',
|
||||
help: 'Summary of the operation durations in seconds',
|
||||
buckets: [0.5, 1, 5, 10, 30, 60, 300, 600],
|
||||
labelNames: ['op']
|
||||
}),
|
||||
|
||||
metricOperationErrors: new prometheusClient.Counter({
|
||||
name: 'speckle_server_operation_errors',
|
||||
help: 'Number of operations with errors',
|
||||
labelNames: ['op']
|
||||
})
|
||||
}
|
|
@ -16,8 +16,10 @@ module.exports = {
|
|||
labelNames: ['route']
|
||||
})
|
||||
}
|
||||
|
||||
return responseTime(function (req, res, time) {
|
||||
let route = 'unknown'
|
||||
if (req.originalUrl === '/graphql') route = '/graphql'
|
||||
if (req.route && req.route.path) route = req.route.path
|
||||
metricRequestDuration.labels(route).observe(time / 1000)
|
||||
})
|
||||
|
|
|
@ -15,7 +15,12 @@ module.exports = function (app) {
|
|||
if (!prometheusInitialized) {
|
||||
prometheusInitialized = true
|
||||
prometheusClient.register.clear()
|
||||
prometheusClient.register.setDefaultLabels({
|
||||
project: 'speckle-server',
|
||||
app: 'server'
|
||||
})
|
||||
prometheusClient.collectDefaultMetrics()
|
||||
|
||||
initKnexPrometheusMetrics()
|
||||
}
|
||||
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -24,7 +24,8 @@
|
|||
"knex": "^1.0.3",
|
||||
"node-fetch": "^2.6.1",
|
||||
"pg": "^8.6.0",
|
||||
"private-ip": "^2.3.3"
|
||||
"private-ip": "^2.3.3",
|
||||
"prom-client": "^14.0.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"cross-env": "^7.0.3",
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
const crypto = require('crypto')
|
||||
const knex = require('./knex')
|
||||
const fs = require('fs')
|
||||
const metrics = require('./prometheusMetrics')
|
||||
|
||||
let shouldExit = false
|
||||
const HEALTHCHECK_FILE_PATH = '/tmp/last_successful_query'
|
||||
|
@ -94,6 +95,7 @@ async function doTask(task) {
|
|||
`,
|
||||
[err.toString(), task.id]
|
||||
)
|
||||
metrics.metricOperationErrors.labels('webhook').inc()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -112,11 +114,16 @@ async function tick() {
|
|||
return
|
||||
}
|
||||
|
||||
const metricDurationEnd = metrics.metricDuration.startTimer()
|
||||
|
||||
await doTask(task)
|
||||
|
||||
metricDurationEnd({ op: 'webhook' })
|
||||
|
||||
// Check for another task very soon
|
||||
setTimeout(tick, 10)
|
||||
} catch (err) {
|
||||
metrics.metricOperationErrors.labels('main_loop').inc()
|
||||
console.log('Error executing task: ', err)
|
||||
setTimeout(tick, 5000)
|
||||
}
|
||||
|
@ -129,6 +136,7 @@ async function main() {
|
|||
shouldExit = true
|
||||
console.log('Shutting down...')
|
||||
})
|
||||
metrics.initPrometheusMetrics()
|
||||
|
||||
tick()
|
||||
}
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
/* eslint-disable no-unused-vars */
|
||||
'use strict'
|
||||
|
||||
const http = require('http')
|
||||
const prometheusClient = require('prom-client')
|
||||
const knex = require('./knex')
|
||||
|
||||
let metricFree = null
|
||||
let metricUsed = null
|
||||
let metricPendingAquires = null
|
||||
let metricQueryDuration = null
|
||||
let metricQueryErrors = null
|
||||
|
||||
const queryStartTime = {}
|
||||
prometheusClient.register.clear()
|
||||
prometheusClient.register.setDefaultLabels({
|
||||
project: 'speckle-server',
|
||||
app: 'webhook-service'
|
||||
})
|
||||
prometheusClient.collectDefaultMetrics()
|
||||
|
||||
let prometheusInitialized = false
|
||||
|
||||
function initKnexPrometheusMetrics() {
|
||||
metricFree = new prometheusClient.Gauge({
|
||||
name: 'speckle_server_knex_free',
|
||||
help: 'Number of free DB connections',
|
||||
collect() {
|
||||
this.set(knex.client.pool.numFree())
|
||||
}
|
||||
})
|
||||
|
||||
metricUsed = new prometheusClient.Gauge({
|
||||
name: 'speckle_server_knex_used',
|
||||
help: 'Number of used DB connections',
|
||||
collect() {
|
||||
this.set(knex.client.pool.numUsed())
|
||||
}
|
||||
})
|
||||
|
||||
metricPendingAquires = new prometheusClient.Gauge({
|
||||
name: 'speckle_server_knex_pending',
|
||||
help: 'Number of pending DB connection aquires',
|
||||
collect() {
|
||||
this.set(knex.client.pool.numPendingAcquires())
|
||||
}
|
||||
})
|
||||
|
||||
metricQueryDuration = new prometheusClient.Summary({
|
||||
name: 'speckle_server_knex_query_duration',
|
||||
help: 'Summary of the DB query durations in seconds'
|
||||
})
|
||||
|
||||
metricQueryErrors = new prometheusClient.Counter({
|
||||
name: 'speckle_server_knex_query_errors',
|
||||
help: 'Number of DB queries with errors'
|
||||
})
|
||||
|
||||
knex.on('query', (data) => {
|
||||
const queryId = data.__knexQueryUid + ''
|
||||
queryStartTime[queryId] = Date.now()
|
||||
})
|
||||
|
||||
knex.on('query-response', (data, obj, builder) => {
|
||||
const queryId = obj.__knexQueryUid + ''
|
||||
const durationSec = (Date.now() - queryStartTime[queryId]) / 1000
|
||||
delete queryStartTime[queryId]
|
||||
if (!isNaN(durationSec)) metricQueryDuration.observe(durationSec)
|
||||
})
|
||||
|
||||
knex.on('query-error', (err, querySpec) => {
|
||||
const queryId = querySpec.__knexQueryUid + ''
|
||||
const durationSec = (Date.now() - queryStartTime[queryId]) / 1000
|
||||
delete queryStartTime[queryId]
|
||||
|
||||
if (!isNaN(durationSec)) metricQueryDuration.observe(durationSec)
|
||||
metricQueryErrors.inc()
|
||||
})
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
initPrometheusMetrics() {
|
||||
if (prometheusInitialized) return
|
||||
prometheusInitialized = true
|
||||
|
||||
initKnexPrometheusMetrics()
|
||||
|
||||
// Define the HTTP server
|
||||
const server = http.createServer(async (req, res) => {
|
||||
if (req.url === '/metrics') {
|
||||
res.setHeader('Content-Type', prometheusClient.register.contentType)
|
||||
res.end(await prometheusClient.register.metrics())
|
||||
} else {
|
||||
res.end('Speckle Webhook Service - prometheus metrics')
|
||||
}
|
||||
})
|
||||
server.listen(Number(process.env.PROMETHEUS_METRICS_PORT) || 9095)
|
||||
},
|
||||
|
||||
metricDuration: new prometheusClient.Histogram({
|
||||
name: 'speckle_server_operation_duration',
|
||||
help: 'Summary of the operation durations in seconds',
|
||||
buckets: [0.5, 1, 5, 10, 30, 60, 300, 600],
|
||||
labelNames: ['op']
|
||||
}),
|
||||
|
||||
metricOperationErrors: new prometheusClient.Counter({
|
||||
name: 'speckle_server_operation_errors',
|
||||
help: 'Number of operations with errors',
|
||||
labelNames: ['op']
|
||||
})
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
# This file is only useful to test the helm chart locally with minikube before committing changes
|
||||
|
||||
build:
|
||||
cd ../.. && docker build -t speckle/speckle-frontend:local -f packages/frontend/Dockerfile .
|
||||
cd ../.. && docker build -t speckle/speckle-server:local -f packages/server/Dockerfile .
|
||||
cd ../.. && docker build -t speckle/speckle-preview-service:local -f packages/preview-service/Dockerfile .
|
||||
cd ../.. && docker build -t speckle/speckle-webhook-service:local -f packages/webhook-service/Dockerfile .
|
||||
cd ../.. && docker build -t speckle/speckle-fileimport-service:local -f packages/fileimport-service/Dockerfile .
|
||||
cd ../.. && docker build -t speckle/speckle-monitor-deployment:local -f utils/monitor-deployment/Dockerfile .
|
||||
|
||||
echo "Making locally built images available inside minikube cluster. This takes a bit to copy, unfortunately..."
|
||||
|
||||
minikube image load speckle/speckle-frontend:local
|
||||
minikube image load speckle/speckle-server:local
|
||||
minikube image load speckle/speckle-preview-service:local
|
||||
minikube image load speckle/speckle-webhook-service:local
|
||||
minikube image load speckle/speckle-fileimport-service:local
|
||||
minikube image load speckle/speckle-monitor-deployment:local
|
||||
|
||||
|
||||
install:
|
||||
helm uninstall speckle-test 2>/dev/null || true
|
||||
helm install -f test-values.yml speckle-test ./speckle-server
|
||||
|
||||
@echo Make sure you add these lines in /etc/hosts:
|
||||
@echo \# ---------------------
|
||||
@echo `minikube ip` speckle.minikube
|
||||
@echo `minikube ip` grafana.minikube
|
||||
@echo `minikube ip` prometheus.minikube
|
||||
@echo \# ---------------------
|
||||
@echo Installed successfully.
|
||||
@echo You can access the prometheus server at http://prometheus.minikube/
|
||||
@echo You can access grafana at http://grafana.minikube/ \(user: admin, password: prom-operator\)
|
||||
@echo You can access the Speckle server at http://speckle.minikube/
|
||||
|
||||
|
||||
# If you have an existing minikube, you can `minikube delete` and `minikube start` to start a fresh cluster
|
||||
# This "make target" will install all requirements in a fresh cluster
|
||||
init-local-minikube:
|
||||
@(kubectl config current-context | grep -q minikube) || (echo "ERROR: Minikube is not the current kubectl context. Temporarily modify the makefile if you really want to use the current configured kubectl context" && exit 1)
|
||||
|
||||
@echo "Enabling nginx ingress minikube plugin..."
|
||||
minikube addons enable ingress
|
||||
|
||||
@echo "Installing kube-prometheus-stack"
|
||||
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
||||
helm repo update
|
||||
helm install kube-prometheus-stack prometheus-community/kube-prometheus-stack
|
||||
|
||||
kubectl apply -f test-init.yml
|
|
@ -8,7 +8,7 @@ type: application
|
|||
# Versions are expected to follow Semantic Versioning (https://semver.org/)
|
||||
|
||||
# Set by the build process to the correct value
|
||||
# version: 0.1.0
|
||||
version: 0.1.0-local
|
||||
|
||||
# This is the version number of the application being deployed. This version number should be
|
||||
# incremented each time you make changes to the application. Versions are not expected to
|
||||
|
@ -16,4 +16,4 @@ type: application
|
|||
# It is recommended to use it with quotes.
|
||||
|
||||
# Set by the build process to the correct value
|
||||
# appVersion: "2.3.3"
|
||||
appVersion: '0.1.0-local'
|
||||
|
|
|
@ -77,7 +77,11 @@ spec:
|
|||
|
||||
env:
|
||||
- name: CANONICAL_URL
|
||||
{{- if .Values.ssl_canonical_url }}
|
||||
value: https://{{ .Values.domain }}
|
||||
{{- else }}
|
||||
value: http://{{ .Values.domain }}
|
||||
{{- end }}
|
||||
|
||||
- name: PORT
|
||||
value: "3000"
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: speckle-monitoring
|
||||
namespace: {{ .Values.namespace }}
|
||||
labels:
|
||||
app: speckle-monitoring
|
||||
project: speckle-server
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: speckle-monitoring
|
||||
project: speckle-server
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: speckle-monitoring
|
||||
project: speckle-server
|
||||
spec:
|
||||
priorityClassName: low-priority
|
||||
|
||||
{{- if .Values.db.useCertificate }}
|
||||
volumes:
|
||||
- name: postgres-certificate
|
||||
configMap:
|
||||
name: postgres-certificate
|
||||
{{- end }}
|
||||
|
||||
terminationGracePeriodSeconds: 10
|
||||
|
||||
containers:
|
||||
- name: main
|
||||
image: speckle/speckle-monitor-deployment:{{ .Values.docker_image_tag }}
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 512Mi
|
||||
|
||||
{{- if .Values.db.useCertificate }}
|
||||
volumeMounts:
|
||||
- name: postgres-certificate
|
||||
mountPath: /postgres-certificate
|
||||
{{- end }}
|
||||
|
||||
env:
|
||||
- name: PG_CONNECTION_STRING
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ .Values.secretName }}
|
||||
key: postgres_url
|
||||
|
||||
{{- if .Values.db.useCertificate }}
|
||||
- name: NODE_EXTRA_CA_CERTS
|
||||
value: "/postgres-certificate/ca-certificate.crt"
|
||||
{{- end }}
|
||||
|
|
@ -4,16 +4,20 @@ metadata:
|
|||
name: speckle-server
|
||||
namespace: {{ .Values.namespace }}
|
||||
annotations:
|
||||
{{- if .Values.cert_manager_issuer }}
|
||||
cert-manager.io/cluster-issuer: {{ .Values.cert_manager_issuer }}
|
||||
{{- end }}
|
||||
nginx.ingress.kubernetes.io/proxy-body-size: "100m"
|
||||
nginx.org/client-max-body-size: "100m"
|
||||
nginx.ingress.kubernetes.io/use-regex: "true"
|
||||
spec:
|
||||
ingressClassName: nginx
|
||||
{{- if .Values.cert_manager_issuer }}
|
||||
tls:
|
||||
- hosts:
|
||||
- {{ .Values.domain }}
|
||||
secretName: server-tls
|
||||
{{- end }}
|
||||
rules:
|
||||
- host: {{ .Values.domain }}
|
||||
http:
|
||||
|
|
|
@ -33,3 +33,75 @@ spec:
|
|||
name: www
|
||||
port: 80
|
||||
targetPort: 80
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: speckle-preview-service-metrics
|
||||
namespace: {{ .Values.namespace }}
|
||||
labels:
|
||||
app: speckle-preview-service-metrics
|
||||
project: speckle-server
|
||||
spec:
|
||||
selector:
|
||||
app: speckle-preview-service
|
||||
project: speckle-server
|
||||
ports:
|
||||
- protocol: TCP
|
||||
name: web
|
||||
port: 9094
|
||||
targetPort: 9094
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: speckle-fileimport-service-metrics
|
||||
namespace: {{ .Values.namespace }}
|
||||
labels:
|
||||
app: speckle-fileimport-service-metrics
|
||||
project: speckle-server
|
||||
spec:
|
||||
selector:
|
||||
app: speckle-fileimport-service
|
||||
project: speckle-server
|
||||
ports:
|
||||
- protocol: TCP
|
||||
name: web
|
||||
port: 9093
|
||||
targetPort: 9093
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: speckle-webhook-service-metrics
|
||||
namespace: {{ .Values.namespace }}
|
||||
labels:
|
||||
app: speckle-webhook-service
|
||||
project: speckle-server
|
||||
spec:
|
||||
selector:
|
||||
app: speckle-webhook-service
|
||||
project: speckle-server
|
||||
ports:
|
||||
- protocol: TCP
|
||||
name: web
|
||||
port: 9095
|
||||
targetPort: 9095
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: speckle-monitoring-metrics
|
||||
namespace: {{ .Values.namespace }}
|
||||
labels:
|
||||
app: speckle-monitoring
|
||||
project: speckle-server
|
||||
spec:
|
||||
selector:
|
||||
app: speckle-monitoring
|
||||
project: speckle-server
|
||||
ports:
|
||||
- protocol: TCP
|
||||
name: web
|
||||
port: 9092
|
||||
targetPort: 9092
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
namespace: speckle-test
|
||||
|
||||
domain: localhost
|
||||
ssl_canonical_url: true
|
||||
|
||||
docker_image_tag: v2.3.3
|
||||
|
||||
|
|
|
@ -1,3 +1,27 @@
|
|||
apiVersion: scheduling.k8s.io/v1
|
||||
kind: PriorityClass
|
||||
metadata:
|
||||
name: high-priority
|
||||
value: 100
|
||||
globalDefault: false
|
||||
description: 'High priority (100) for business-critical services'
|
||||
---
|
||||
apiVersion: scheduling.k8s.io/v1
|
||||
kind: PriorityClass
|
||||
metadata:
|
||||
name: medium-priority
|
||||
value: 50
|
||||
globalDefault: true
|
||||
description: 'Medium priority (50) - dev/test services'
|
||||
---
|
||||
apiVersion: scheduling.k8s.io/v1
|
||||
kind: PriorityClass
|
||||
metadata:
|
||||
name: low-priority
|
||||
value: -100
|
||||
globalDefault: false
|
||||
description: 'Low priority (-100) - Non-critical microservices'
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
|
@ -163,6 +187,42 @@ spec:
|
|||
port: 9001
|
||||
targetPort: 9001
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: grafana
|
||||
spec:
|
||||
ingressClassName: nginx
|
||||
rules:
|
||||
- host: grafana.minikube
|
||||
http:
|
||||
paths:
|
||||
- pathType: Prefix
|
||||
path: '/'
|
||||
backend:
|
||||
service:
|
||||
name: kube-prometheus-stack-grafana
|
||||
port:
|
||||
number: 80
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: prometheus
|
||||
spec:
|
||||
ingressClassName: nginx
|
||||
rules:
|
||||
- host: prometheus.minikube
|
||||
http:
|
||||
paths:
|
||||
- pathType: Prefix
|
||||
path: '/'
|
||||
backend:
|
||||
service:
|
||||
name: prometheus-operated
|
||||
port:
|
||||
number: 9090
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
namespace: speckle-test
|
||||
|
||||
domain: myspeckleserver
|
||||
domain: speckle.minikube
|
||||
ssl_canonical_url: false
|
||||
|
||||
docker_image_tag: 'local'
|
||||
|
||||
|
@ -13,4 +14,5 @@ s3:
|
|||
access_key: 'minioadmin'
|
||||
create_bucket: 'true'
|
||||
|
||||
cert_manager_issuer: letsencrypt-staging
|
||||
cert_manager_issuer: ~
|
||||
enable_prometheus_monitoring: true
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
FROM python:3.8-slim
|
||||
|
||||
RUN pip install psycopg2-binary prometheus-client
|
||||
|
||||
COPY utils/monitor-deployment/src /app
|
||||
WORKDIR /app
|
||||
|
||||
CMD ["python", "-u", "run.py"]
|
|
@ -0,0 +1,4 @@
|
|||
#!/bin/bash
|
||||
|
||||
export PG_CONNECTION_STRING=postgres://speckle:speckle@localhost/speckle
|
||||
cd src && python3 -u run.py
|
|
@ -0,0 +1,105 @@
|
|||
#!/usr/bin/env python
|
||||
import os
|
||||
|
||||
import psycopg2
|
||||
from prometheus_client import start_http_server, Gauge
|
||||
import time
|
||||
import logging
|
||||
LOG = logging.getLogger(__name__)
|
||||
PG_CONNECTION_STRING = os.environ['PG_CONNECTION_STRING']
|
||||
|
||||
PROM = {
|
||||
'db_size': Gauge('speckle_db_size', 'Size of the entire database (in bytes)'),
|
||||
'objects': Gauge('speckle_db_objects', 'Number of objects'),
|
||||
'streams': Gauge('speckle_db_streams', 'Number of streams'),
|
||||
'commits': Gauge('speckle_db_commits', 'Number of commits'),
|
||||
'users': Gauge('speckle_db_users', 'Number of users'),
|
||||
'fileimports': Gauge('speckle_db_fileimports', 'Number of imported files, by type and status', labelnames=('filetype','status')),
|
||||
'webhooks': Gauge('speckle_db_webhooks', 'Number of webhook calls, by status', labelnames=('status',)),
|
||||
'previews': Gauge('speckle_db_previews', 'Number of previews, by status', labelnames=('status',)),
|
||||
'filesize': Gauge('speckle_db_filesize', 'Size of imported files, by type (in bytes)', labelnames=('filetype',)),
|
||||
}
|
||||
|
||||
|
||||
def tick(cur):
|
||||
# Total DB size
|
||||
cur.execute('SELECT pg_database_size(%s)', (cur.connection.info.dbname,))
|
||||
PROM['db_size'].set(cur.fetchone()[0])
|
||||
|
||||
# Counts for users, streams, commits, objects
|
||||
cur.execute("SELECT count(*) FROM objects;")
|
||||
PROM['objects'].set(cur.fetchone()[0])
|
||||
cur.execute("SELECT count(*) FROM streams;")
|
||||
PROM['streams'].set(cur.fetchone()[0])
|
||||
cur.execute("SELECT count(*) FROM commits;")
|
||||
PROM['commits'].set(cur.fetchone()[0])
|
||||
cur.execute("SELECT count(*) FROM users;")
|
||||
PROM['users'].set(cur.fetchone()[0])
|
||||
|
||||
# File Imports
|
||||
cur.execute(
|
||||
'''
|
||||
SELECT "fileType", "convertedStatus", count(*)
|
||||
FROM file_uploads
|
||||
GROUP BY ("fileType", "convertedStatus")
|
||||
'''
|
||||
)
|
||||
for row in cur:
|
||||
PROM['fileimports'].labels(row[0], str(row[1])).set(row[2])
|
||||
|
||||
cur.execute(
|
||||
'''
|
||||
SELECT "fileType", SUM("fileSize")
|
||||
FROM file_uploads
|
||||
GROUP BY "fileType"
|
||||
'''
|
||||
)
|
||||
for row in cur:
|
||||
PROM['filesize'].labels(row[0]).set(row[1])
|
||||
|
||||
# Webhooks
|
||||
cur.execute(
|
||||
'''
|
||||
SELECT status, count(*)
|
||||
FROM webhooks_events
|
||||
GROUP BY status
|
||||
'''
|
||||
)
|
||||
for row in cur:
|
||||
PROM['webhooks'].labels(str(row[0])).set(row[1])
|
||||
|
||||
# Previews
|
||||
cur.execute(
|
||||
'''
|
||||
SELECT "previewStatus", count(*)
|
||||
FROM object_preview
|
||||
GROUP BY "previewStatus"
|
||||
'''
|
||||
)
|
||||
for row in cur:
|
||||
PROM['previews'].labels(str(row[0])).set(row[1])
|
||||
|
||||
def main():
|
||||
start_http_server(9092)
|
||||
|
||||
while True:
|
||||
conn = None
|
||||
cur = None
|
||||
try:
|
||||
conn = psycopg2.connect(PG_CONNECTION_STRING)
|
||||
cur = conn.cursor()
|
||||
tick(cur)
|
||||
except Exception as ex:
|
||||
LOG.error("Error: %s", str(ex))
|
||||
finally:
|
||||
if cur:
|
||||
cur.close()
|
||||
if conn:
|
||||
conn.close()
|
||||
|
||||
time.sleep(60)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
main()
|
Загрузка…
Ссылка в новой задаче