2019-04-11 22:38:53 +03:00
|
|
|
apiVersion: v1
|
|
|
|
kind: ServiceAccount
|
|
|
|
metadata:
|
|
|
|
name: azure-npm
|
|
|
|
namespace: kube-system
|
|
|
|
labels:
|
|
|
|
addonmanager.kubernetes.io/mode: EnsureExists
|
|
|
|
---
|
2021-05-05 09:51:45 +03:00
|
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
2019-04-11 22:38:53 +03:00
|
|
|
kind: ClusterRole
|
|
|
|
metadata:
|
|
|
|
name: azure-npm
|
|
|
|
namespace: kube-system
|
|
|
|
labels:
|
|
|
|
addonmanager.kubernetes.io/mode: EnsureExists
|
|
|
|
rules:
|
|
|
|
- apiGroups:
|
2022-06-14 21:58:17 +03:00
|
|
|
- ""
|
2019-04-11 22:38:53 +03:00
|
|
|
resources:
|
|
|
|
- pods
|
|
|
|
- nodes
|
|
|
|
- namespaces
|
|
|
|
verbs:
|
|
|
|
- get
|
|
|
|
- list
|
|
|
|
- watch
|
|
|
|
- apiGroups:
|
2022-06-14 21:58:17 +03:00
|
|
|
- networking.k8s.io
|
2019-04-11 22:38:53 +03:00
|
|
|
resources:
|
|
|
|
- networkpolicies
|
|
|
|
verbs:
|
|
|
|
- get
|
|
|
|
- list
|
|
|
|
- watch
|
|
|
|
---
|
2021-05-05 09:51:45 +03:00
|
|
|
apiVersion: rbac.authorization.k8s.io/v1
|
2022-06-14 21:58:17 +03:00
|
|
|
kind: ClusterRoleBinding
|
2019-04-11 22:38:53 +03:00
|
|
|
metadata:
|
|
|
|
name: azure-npm-binding
|
|
|
|
namespace: kube-system
|
|
|
|
labels:
|
|
|
|
addonmanager.kubernetes.io/mode: EnsureExists
|
|
|
|
subjects:
|
|
|
|
- kind: ServiceAccount
|
|
|
|
name: azure-npm
|
|
|
|
namespace: kube-system
|
|
|
|
roleRef:
|
|
|
|
kind: ClusterRole
|
|
|
|
name: azure-npm
|
|
|
|
apiGroup: rbac.authorization.k8s.io
|
|
|
|
---
|
Prometheus metrics (#590)
* prometheus additions to testmain (commented out right now)
* home of the npm prometheus metrics and tools for updating them, testing them
* add/remove policy metrics
* add/remove iptables rule metric measurements
* add/remove ipset metric measurements
* testing for gauges. want to soon remove the boolean for including prometheus in unit testing
* run http server that exposes prometheus from main
* cleaner test additions with less code
* removed incorrect instance of AddSet in the TestDeleteSet test
* added prometheus annotations to pod templates
* deleted unused file
* much more organized initialization of metrics now. now includes map from metric to metric name
* add ability to get summary count value. now getting gauge values and this new count value are done by passing the metric itself as a param instead of a string
* condenses prometheus testing code base by condensing all prometheus error messages into a function
* added testing for summary counts, condensed prometheus error handling code, and updated calls to use new form for getting metric values
* update based on variable spelling change in metrics package
* Added comments for functions and moved http handler code to the http file
* fixed problem of registering same metric name for different metrics, and passing in the wrong param type for testing
* made prometheus testing folder with interactive testing file. moved old random metric flux testing function over from ipsm_test
* moved testing around again
* fixed spelling mistake
* counting mistake in unit test
* handler variable ws in wrong file. Changed stdout printing to logging
* fixed parameter errors and counting error in a test
* moved utilities for testing prometheus metrics to npm/util. Updated StartHTTP to have an additional parameter for waiting after starting the server
* updated uses of StartHTTP to have the extra parameter
* updated GetValue and GetCountValue uses to use the prometheus features of the util package, which is now moved to a promutil package within npm/metrics/
* removed unnecessary comments, removed print statement, and added quantiles to all summary metrics
* fixed problem of double registering metrics
* wait longer for http server to start
* moved tool in test-util.go to promutil/util.go
* fixed timer to be in milliseconds and updated metric descriptions to mention units
* removed unnecessary comments
* http server always started in a go routine now. Added comment justifying the use of an http server
* debugging http connection refused in pipeline
* fixed syntax error
* removed debugging wrapper around http service
* sleep so that the testing metrics endpoint can be pinged
* redesigned GetValue and GetCountValue so that they don't use http calls
* removed random but helpful testing file - will write about quick testing in a wiki page
* milliseconds were being truncated. now they have decimals
* use direct Prometheus metric commands instead of wrapping them
* removed code used when testing was done through http server. Moved registering to metric creation functions
* added createGaugeVec, updated comments, made all help strings constants
* added metric that counts number of entries in each ipset. still need to add tests
* fixed creation of GaugeVecs, and use explicit labeling instead of order-based labeling now
* updated GetVecValue method signature
* added set to metrics on creation and wrote unit tests for CreateSet, AddToSet, DeleteFromSet, DeleteSet
* use custom registry to limit content that Container Insights scrapes. Also log the start of http server
* wrote TODO item comments for Restore and Destroy (currently these functions are only used in testing)
* NPM won't crash if a Prometheus metric fails to register now (unlikely). Added logging for metric registration/creation, and explicit public function to initialize metrics so that we can finish log config first
* initialize metrics in unit tests
* renamed util.go to test-util.go
Co-authored-by: Hunter Gregory <t-hugreg@microsoft.com>
2020-07-15 02:41:02 +03:00
|
|
|
apiVersion: apps/v1
|
2019-04-11 22:38:53 +03:00
|
|
|
kind: DaemonSet
|
|
|
|
metadata:
|
|
|
|
name: azure-npm
|
|
|
|
namespace: kube-system
|
|
|
|
labels:
|
|
|
|
app: azure-npm
|
|
|
|
addonmanager.kubernetes.io/mode: EnsureExists
|
|
|
|
spec:
|
|
|
|
selector:
|
|
|
|
matchLabels:
|
|
|
|
k8s-app: azure-npm
|
|
|
|
template:
|
|
|
|
metadata:
|
|
|
|
labels:
|
|
|
|
k8s-app: azure-npm
|
|
|
|
annotations:
|
2022-06-14 21:58:17 +03:00
|
|
|
scheduler.alpha.kubernetes.io/critical-pod: ""
|
|
|
|
azure.npm/scrapeable: ""
|
2019-04-11 22:38:53 +03:00
|
|
|
spec:
|
|
|
|
priorityClassName: system-node-critical
|
|
|
|
tolerations:
|
2022-06-14 21:58:17 +03:00
|
|
|
- operator: "Exists"
|
|
|
|
effect: NoExecute
|
|
|
|
- operator: "Exists"
|
|
|
|
effect: NoSchedule
|
|
|
|
- key: CriticalAddonsOnly
|
|
|
|
operator: Exists
|
2019-04-11 22:38:53 +03:00
|
|
|
containers:
|
|
|
|
- name: azure-npm
|
2024-02-16 01:28:26 +03:00
|
|
|
image: mcr.microsoft.com/containernetworking/azure-npm:v1.4.45.3
|
2021-04-09 21:14:52 +03:00
|
|
|
resources:
|
|
|
|
limits:
|
|
|
|
cpu: 250m
|
|
|
|
memory: 300Mi
|
|
|
|
requests:
|
|
|
|
cpu: 250m
|
2019-04-11 22:38:53 +03:00
|
|
|
securityContext:
|
2024-02-29 05:56:16 +03:00
|
|
|
privileged: false
|
|
|
|
capabilities:
|
|
|
|
add:
|
|
|
|
- NET_ADMIN
|
|
|
|
readOnlyRootFilesystem: true
|
2019-04-11 22:38:53 +03:00
|
|
|
env:
|
|
|
|
- name: HOSTNAME
|
|
|
|
valueFrom:
|
|
|
|
fieldRef:
|
|
|
|
apiVersion: v1
|
|
|
|
fieldPath: spec.nodeName
|
2021-09-01 19:03:24 +03:00
|
|
|
- name: NPM_CONFIG
|
|
|
|
value: /etc/azure-npm/azure-npm.json
|
2019-04-11 22:38:53 +03:00
|
|
|
volumeMounts:
|
2022-06-14 21:58:17 +03:00
|
|
|
- name: log
|
|
|
|
mountPath: /var/log
|
|
|
|
- name: xtables-lock
|
|
|
|
mountPath: /run/xtables.lock
|
|
|
|
- name: protocols
|
|
|
|
mountPath: /etc/protocols
|
|
|
|
- name: azure-npm-config
|
|
|
|
mountPath: /etc/azure-npm
|
2024-02-29 05:56:16 +03:00
|
|
|
- name: tmp
|
|
|
|
mountPath: /tmp
|
2019-04-11 22:38:53 +03:00
|
|
|
hostNetwork: true
|
2024-02-16 01:28:26 +03:00
|
|
|
hostUsers: false
|
2022-07-12 02:32:11 +03:00
|
|
|
nodeSelector:
|
|
|
|
kubernetes.io/os: linux
|
2019-04-11 22:38:53 +03:00
|
|
|
volumes:
|
2022-06-14 21:58:17 +03:00
|
|
|
- name: log
|
|
|
|
hostPath:
|
|
|
|
path: /var/log
|
|
|
|
type: Directory
|
|
|
|
- name: xtables-lock
|
|
|
|
hostPath:
|
|
|
|
path: /run/xtables.lock
|
|
|
|
type: File
|
|
|
|
- name: protocols
|
|
|
|
hostPath:
|
|
|
|
path: /etc/protocols
|
|
|
|
type: File
|
|
|
|
- name: azure-npm-config
|
|
|
|
configMap:
|
|
|
|
name: azure-npm-config
|
2024-02-29 05:56:16 +03:00
|
|
|
- name: tmp
|
|
|
|
emptyDir: {}
|
2019-09-25 21:32:29 +03:00
|
|
|
serviceAccountName: azure-npm
|
2020-08-05 18:21:29 +03:00
|
|
|
---
|
|
|
|
apiVersion: v1
|
|
|
|
kind: Service
|
|
|
|
metadata:
|
|
|
|
name: npm-metrics-cluster-service
|
|
|
|
namespace: kube-system
|
|
|
|
labels:
|
|
|
|
app: npm-metrics
|
|
|
|
spec:
|
|
|
|
selector:
|
|
|
|
k8s-app: azure-npm
|
|
|
|
ports:
|
|
|
|
- port: 9000
|
|
|
|
targetPort: 10091
|
2021-09-01 19:03:24 +03:00
|
|
|
---
|
|
|
|
apiVersion: v1
|
|
|
|
kind: ConfigMap
|
|
|
|
metadata:
|
|
|
|
name: azure-npm-config
|
|
|
|
namespace: kube-system
|
|
|
|
data:
|
|
|
|
azure-npm.json: |
|
|
|
|
{
|
2023-10-05 17:20:25 +03:00
|
|
|
"ResyncPeriodInMinutes": 15,
|
|
|
|
"ListeningPort": 10091,
|
|
|
|
"ListeningAddress": "0.0.0.0",
|
|
|
|
"ApplyIntervalInMilliseconds": 500,
|
|
|
|
"ApplyMaxBatches": 100,
|
|
|
|
"MaxBatchedACLsPerPod": 30,
|
|
|
|
"NetPolInvervalInMilliseconds": 500,
|
|
|
|
"MaxPendingNetPols": 100,
|
2021-09-01 19:03:24 +03:00
|
|
|
"Toggles": {
|
|
|
|
"EnablePrometheusMetrics": true,
|
|
|
|
"EnablePprof": true,
|
2021-11-19 20:31:42 +03:00
|
|
|
"EnableHTTPDebugAPI": true,
|
2022-06-14 21:58:17 +03:00
|
|
|
"EnableV2NPM": true,
|
2023-07-19 19:13:52 +03:00
|
|
|
"PlaceAzureChainFirst": false,
|
|
|
|
"ApplyInBackground": true,
|
|
|
|
"NetPolInBackground": true
|
2021-09-01 19:03:24 +03:00
|
|
|
}
|
2022-02-05 03:25:12 +03:00
|
|
|
}
|