From 8750b346edcc41d00da32f2f4fe4d13a43de9ae4 Mon Sep 17 00:00:00 2001 From: Evan Baker Date: Wed, 11 May 2022 12:34:47 -0500 Subject: [PATCH] CNS Prometheus and Grafana examples (#1366) * cns prometheus examples Signed-off-by: Evan Baker * grafana samples Signed-off-by: Evan Baker --- cns/doc/examples/metrics/README.md | 66 ++ cns/doc/examples/metrics/grafana.json | 1092 +++++++++++++++++++ cns/doc/examples/metrics/podMonitor.yaml | 14 + cns/doc/examples/metrics/scrape_config.yaml | 76 ++ 4 files changed, 1248 insertions(+) create mode 100644 cns/doc/examples/metrics/README.md create mode 100644 cns/doc/examples/metrics/grafana.json create mode 100644 cns/doc/examples/metrics/podMonitor.yaml create mode 100644 cns/doc/examples/metrics/scrape_config.yaml diff --git a/cns/doc/examples/metrics/README.md b/cns/doc/examples/metrics/README.md new file mode 100644 index 000000000..bd51aa7a0 --- /dev/null +++ b/cns/doc/examples/metrics/README.md @@ -0,0 +1,66 @@ +# Azure CNS metrics +azure-cns exposes metrics via Prometheus on `:10092/metrics` + +## Scraping +Prometheus can be configured using these examples: +- a [podMonitor](podMonitor.yaml), if using promotheus-operator or kube-prometheus +- manually via this equivalent [scrape_config](scrape_config.yaml) + +## Monitoring +To view all available CNS metrics once Prometheus is correctly configured to scrape: +```promql +count ({job="kube-system/azure-cns"}) by (__name__) +``` + +CNS exposes standard Go and Prom metrics such as `go_goroutines`, `go_gc*`, `up`, and more. + +Metrics designed to be customer-facing are generally prefixed with `cx_` and can be listed similarly: +```promql +count ({__name__=~"cx.*",job="kube-system/azure-cns"}) by (__name__) +``` +At time of writing, the following cx metrics are exposed (key metrics in **bold**): +- **cx_ipam_available_ips** (IPs reserved by the Node but not assigned to Pods yet) +- cx_ipam_batch_size +- cx_ipam_current_available_ips +- cx_ipam_expect_available_ips +- **cx_ipam_max_ips** (maximum IPs the Node can reserve from the Subnet) +- cx_ipam_pending_programming_ips +- cx_ipam_pending_release_ips +- **cx_ipam_pod_allocated_ips** (IPs assigned to Pods on the Node) +- cx_ipam_requested_ips +- **cx_ipam_total_ips** (IPs reserved by the Node from the Subnet) + +These metrics may be used to gain insight in to the current state of the cluster's IPAM. + +For example, to view the current IP count requested by each node: +```promql +sum (cx_ipam_requested_ips{job="kube-system/azure-cns"}) by (instance) +``` +To view the current IP count allocated to each node: +```promql +sum (cx_ipam_total_ips{job="kube-system/azure-cns"}) by (instance) +``` +> Note: if these two values aren't converging after some time, that indicates an IP provisioning error. + +To view the current IP count assigned to pods, per node: +```promql +sum (cx_ipam_pod_allocated_ips{job="kube-system/azure-cns"}) by (instance) +``` + +## Visualizing +A sample Grafana dashboard is included at [grafan.json](grafana.json). + +Visualizations included are: +- Per Node + - CNS Status (Up/Down) + - Requested IPs + - Reserved IPs + - Used IPs + - Request/Reserved/Used vs Time +- Per Cluster + - Total Reserver IPs vs Time + - Total Used IPs vs Time + - Reserved and Assigned vs Time + - Cluster Subnet Utilization Percentage vs Time + - Cluster Subnet Utilization Total vs Time + - Node Headroom (how many additional Nodes can be added to the Cluster based on the Subnet capacity) diff --git a/cns/doc/examples/metrics/grafana.json b/cns/doc/examples/metrics/grafana.json new file mode 100644 index 000000000..992c7d847 --- /dev/null +++ b/cns/doc/examples/metrics/grafana.json @@ -0,0 +1,1092 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": [], + "__requires": [ + { + "type": "panel", + "id": "barchart", + "name": "Bar chart", + "version": "" + }, + { + "type": "panel", + "id": "gauge", + "name": "Gauge", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "8.5.1" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "iteration": 1652131389388, + "links": [], + "liveNow": true, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 10, + "panels": [], + "title": "Per Node Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "CNS instance liveness according to /metrics on each instance.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [ + { + "options": { + "0": { + "color": "text", + "index": 0, + "text": "DOWN" + }, + "1": { + "color": "text", + "index": 1, + "text": "UP" + } + }, + "type": "value" + } + ], + "max": 1, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-green", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": { + "valueSize": 20 + } + }, + "pluginVersion": "8.5.1", + "repeat": "instance", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "up{job=\"kube-system/azure-cns\", instance=~\"$instance\"}", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "{{instance}}", + "range": false, + "refId": "A" + } + ], + "title": "$instance Status", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 250, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 3, + "y": 1 + }, + "id": 51, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.5.1", + "repeat": "instance", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "cx_ipam_requested_ips{instance=~\"$instance\"}", + "refId": "A" + } + ], + "title": "$instance Requested IPs", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 250, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 6, + "y": 1 + }, + "id": 35, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.5.1", + "repeat": "instance", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "cx_ipam_total_ips{instance=~\"$instance\"}", + "refId": "A" + } + ], + "title": "$instance Reserved IPs", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 250, + "min": 0, + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 9, + "y": 1 + }, + "id": 42, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.5.1", + "repeat": "instance", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "cx_ipam_pod_allocated_ips{instance=~\"$instance\"}", + "refId": "A" + } + ], + "title": "$instance Used IPs", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Cluster IP utilization displayed as IPs assigned to Pods vs total IPs allocated to the Node, by Instance.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "right", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 12, + "y": 1 + }, + "id": 4, + "maxPerRow": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "bottom" + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "repeat": "instance", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "cx_ipam_requested_ips{instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "Requested by CNS", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "cx_ipam_total_ips{instance=~\"$instance\"}", + "hide": false, + "legendFormat": "Allocated to Node", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "cx_ipam_pod_allocated_ips{instance=~\"$instance\"}", + "hide": false, + "legendFormat": "Assigned to Pods", + "range": true, + "refId": "C" + } + ], + "title": "IP utilization", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 16, + "panels": [], + "title": "Cluster Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Total IPs reserved by CNS by Node", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "right", + "axisSoftMin": 0, + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 8, + "x": 0, + "y": 20 + }, + "id": 19, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "normal", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum (cx_ipam_total_ips) by (instance)", + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Total Reserved IPs", + "type": "barchart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "All IPs in use by Pods by Node", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "right", + "axisSoftMin": 0, + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 8, + "x": 8, + "y": 20 + }, + "id": 18, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "normal", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum (cx_ipam_pod_allocated_ips) by (instance)", + "legendFormat": "{{instance}}", + "range": true, + "refId": "A" + } + ], + "title": "Total Assigned Pod IPs", + "type": "barchart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "right", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 14, + "w": 8, + "x": 16, + "y": 20 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(cx_ipam_total_ips)", + "legendFormat": "Reserved IPs", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(cx_ipam_pod_allocated_ips)", + "hide": false, + "legendFormat": "Used IPs", + "range": true, + "refId": "B" + } + ], + "title": "Reserved vs Assigned Pod IPs", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "How much of the Subnet this Cluster is using.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 2, + "displayName": "IPs", + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 0, + "y": 34 + }, + "id": 21, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": false, + "text": {} + }, + "pluginVersion": "8.5.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum (cx_ipam_pod_allocated_ips) / (2^(32-$subnet_mask))", + "hide": false, + "range": true, + "rawQuery": false, + "refId": "A" + } + ], + "title": "Cluster Subnet Utilization %", + "transformations": [], + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "How much of the Subnet this Cluster is using.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisGridShow": true, + "axisLabel": "", + "axisPlacement": "right", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "mappings": [], + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 4, + "y": 34 + }, + "id": 27, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "2^(32-$subnet_mask)", + "legendFormat": "IPs available in /$subnet_mask", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum(cx_ipam_pod_allocated_ips)", + "hide": false, + "legendFormat": "Used IPs", + "range": true, + "refId": "B" + } + ], + "title": "Cluster Subnet Utilization", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "The most Nodes that could be added to the Cluster given the current Subnet capacity.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 16, + "y": 34 + }, + "id": 63, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "8.5.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "((2^(32-$subnet_mask))-(sum(cx_ipam_total_ips)))/(min(cx_ipam_batch_size))", + "refId": "A" + } + ], + "title": "Node Headroom", + "type": "gauge" + } + ], + "refresh": "5s", + "schemaVersion": 36, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(up{job=\"kube-system/azure-cns\"}, instance)", + "description": "CNS instance to inspect", + "hide": 0, + "includeAll": true, + "label": "Instance", + "multi": true, + "name": "instance", + "options": [], + "query": { + "query": "label_values(up{job=\"kube-system/azure-cns\"}, instance)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 3, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(cx_ipam_max_ips{job=\"kube-system/azure-cns\"}, subnet_cidr)", + "hide": 2, + "includeAll": false, + "multi": false, + "name": "subnet_mask", + "options": [], + "query": { + "query": "label_values(cx_ipam_max_ips{job=\"kube-system/azure-cns\"}, subnet_cidr)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "/.*\\/(.+?)$/", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "kube-system/azure-cns", + "uid": "mbPa4Al7z", + "version": 32, + "weekStart": "" +} diff --git a/cns/doc/examples/metrics/podMonitor.yaml b/cns/doc/examples/metrics/podMonitor.yaml new file mode 100644 index 000000000..fcc0f4e9f --- /dev/null +++ b/cns/doc/examples/metrics/podMonitor.yaml @@ -0,0 +1,14 @@ +## This example podMonitor config can be used with a Prometheus-Operator +## managed Prometheus to automatically discover and collect azure-cns metrics. +--- +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: azure-cns + namespace: kube-system +spec: + podMetricsEndpoints: + - port: metrics + selector: + matchLabels: + k8s-app: azure-cns diff --git a/cns/doc/examples/metrics/scrape_config.yaml b/cns/doc/examples/metrics/scrape_config.yaml new file mode 100644 index 000000000..bcdf49423 --- /dev/null +++ b/cns/doc/examples/metrics/scrape_config.yaml @@ -0,0 +1,76 @@ +## This example Prometheus scrape-config can be used with a manually +## configured Prometheus to collect azure-cns metrics. +- job_name: azure-cns + honor_timestamps: true + scrape_interval: 30s + scrape_timeout: 10s + metrics_path: /metrics + scheme: http + follow_redirects: true + enable_http2: true + relabel_configs: + - source_labels: [job] + separator: ; + regex: (.*) + target_label: __tmp_prometheus_job_name + replacement: $1 + action: replace + - source_labels: [__meta_kubernetes_pod_label_k8s_app, __meta_kubernetes_pod_labelpresent_k8s_app] + separator: ; + regex: (azure-cns);true + replacement: $1 + action: keep + - source_labels: [__meta_kubernetes_pod_container_port_name] + separator: ; + regex: metrics + replacement: $1 + action: keep + - source_labels: [__meta_kubernetes_namespace] + separator: ; + regex: (.*) + target_label: namespace + replacement: $1 + action: replace + - source_labels: [__meta_kubernetes_pod_container_name] + separator: ; + regex: (.*) + target_label: container + replacement: $1 + action: replace + - source_labels: [__meta_kubernetes_pod_name] + separator: ; + regex: (.*) + target_label: pod + replacement: $1 + action: replace + - separator: ; + regex: (.*) + target_label: job + replacement: kube-system/azure-cns + action: replace + - separator: ; + regex: (.*) + target_label: endpoint + replacement: metrics + action: replace + - source_labels: [__address__] + separator: ; + regex: (.*) + modulus: 1 + target_label: __tmp_hash + replacement: $1 + action: hashmod + - source_labels: [__tmp_hash] + separator: ; + regex: "0" + replacement: $1 + action: keep + kubernetes_sd_configs: + - role: pod + kubeconfig_file: "" + follow_redirects: true + enable_http2: true + namespaces: + own_namespace: false + names: + - kube-system