add all
This commit is contained in:
Родитель
4a2e3c1fb5
Коммит
0c4887ed24
|
@ -0,0 +1 @@
|
|||
.terragrunt-cache
|
|
@ -0,0 +1,10 @@
|
|||
FROM python:alpine
|
||||
|
||||
WORKDIR /app
|
||||
COPY ./app .
|
||||
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD [ "python", "/app/rules-exporter.py"]
|
|
@ -0,0 +1,28 @@
|
|||
# Rules exporter
|
||||
|
||||
The alerts system of prometheus does not retrun when an alert is not firing. This exporter will get rules configured on the [inventory website](https://inventory.internal.unity3d.com/alerts/rules) and create a new metric with all rules up and firing in prometheus format.
|
||||
|
||||
|
||||
## Rules
|
||||
|
||||
We use labels to select rules in the list. To be consider, a rule have to have labels:
|
||||
|
||||
* **type**: cloud_health
|
||||
* **service**: what ever the content, it will use in the global dashboard to group health status by services
|
||||
|
||||
## Configuration
|
||||
|
||||
| Environment variable | Default Value | Required |
|
||||
| --------------------- | --------------- | ---------- |
|
||||
| INVENTORY_URL | None | yes |
|
||||
| INVENTORY_TOKEN | None | yes |
|
||||
| INVENTORY_ENV | None | yes |
|
||||
| EXPORTER_PORT | 8000 | no |
|
||||
| LOG_LEVEL | info | no |
|
||||
|
||||
|
||||
## Development
|
||||
|
||||
Log in on [inventory website](https://inventory.internal.unity3d.com) and go to [rules list](https://inventory.internal.unity3d.com/alerts/rules). When you inspect the code with your browser in the network tab you will find the bear token ok api call
|
||||
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
click
|
||||
prometheus_client
|
||||
requests
|
||||
|
|
@ -0,0 +1,268 @@
|
|||
import logging
|
||||
import math
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
import click
|
||||
import requests
|
||||
|
||||
from prometheus_client import start_http_server, Gauge
|
||||
from requests.adapters import HTTPAdapter
|
||||
from requests.packages.urllib3.util.retry import Retry
|
||||
|
||||
|
||||
class RulesExporter():
|
||||
'''
|
||||
The RulesExporter collect metrics and alerts to create a prometheus metric page.
|
||||
|
||||
Args:
|
||||
url_inventory (str): unity inventory url
|
||||
token (str): bear token to authentificate in the inventory api
|
||||
port (int): listen port of http server
|
||||
log (str): log level (debug, info, error)
|
||||
inventory_api_batch_size (int): size of batch in paginated api call
|
||||
env (str): envrinoment
|
||||
'''
|
||||
__slots__ = [
|
||||
'url_inventory',
|
||||
'url_promehteus',
|
||||
'token',
|
||||
'env',
|
||||
'log',
|
||||
'port',
|
||||
'inventory_api_batch_size',
|
||||
'gauge'
|
||||
]
|
||||
|
||||
def __init__(self, url_inventory, url_promehteus, token, env, port=8000, log_level='info', inventory_api_batch_size=500):
|
||||
self._init_logging(log_level=log_level)
|
||||
self.log.info('Initialisation of the rules exporter')
|
||||
if not url_inventory:
|
||||
self.log.critical(f'Inventory url not found')
|
||||
sys.exit(128)
|
||||
|
||||
if not url_promehteus:
|
||||
self.log.critical('Prometheus url not found')
|
||||
sys.exit(128)
|
||||
|
||||
if not token:
|
||||
self.log.critical(f'Parameters token not found')
|
||||
sys.exit(128)
|
||||
|
||||
if not env:
|
||||
self.log.critical(f'Parameters env not found')
|
||||
sys.exit(128)
|
||||
|
||||
self.inventory_api_batch_size = inventory_api_batch_size
|
||||
self.url_inventory = url_inventory
|
||||
self.url_promehteus = url_promehteus
|
||||
|
||||
if env == "test":
|
||||
self.env = "int"
|
||||
else:
|
||||
self.env = env
|
||||
|
||||
self.token = token
|
||||
self.port = int(port)
|
||||
self.gauge = Gauge(
|
||||
'CLOUD_ALERTS',
|
||||
'List of rules',
|
||||
['env', 'alertname', 'service', 'type', 'alertstate'],
|
||||
)
|
||||
|
||||
def start_http_server(self):
|
||||
'''
|
||||
Start the http server to publish prometheus metric page on the root.
|
||||
'''
|
||||
self.log.info(f'Starting the web server on port {self.port}')
|
||||
start_http_server(self.port)
|
||||
while True:
|
||||
self._process_request()
|
||||
time.sleep(60)
|
||||
|
||||
def _init_logging(self, log_level='info'):
|
||||
'''
|
||||
Initialize the logging service.
|
||||
|
||||
Args:
|
||||
log_level (str): log level (debug, info, error)
|
||||
'''
|
||||
level = self._get_log_level(log_level)
|
||||
logging.basicConfig(
|
||||
format='%(asctime)s-%(levelname)s: %(message)s',
|
||||
level=level)
|
||||
self.log = logging.getLogger()
|
||||
self.log.debug(f'Log level to debug')
|
||||
|
||||
def _get_log_level(self, log_level):
|
||||
'''
|
||||
Get the log level in string and return the log level enum value.
|
||||
|
||||
Args:
|
||||
log_level (str): log level (debug, info, error)
|
||||
|
||||
Return:
|
||||
int: log level
|
||||
|
||||
'''
|
||||
level = {
|
||||
'debug': logging.DEBUG,
|
||||
'info': logging.INFO,
|
||||
'error': logging.ERROR
|
||||
}
|
||||
return level.get(log_level, logging.INFO)
|
||||
|
||||
def _process_request(self):
|
||||
'''
|
||||
Get all data and generate the metrics page.
|
||||
'''
|
||||
self.log.info(f'Get rules on {self.url_inventory}')
|
||||
rules = self._get_rules()
|
||||
alerts = self._get_alertes_triggered()
|
||||
self._generate_metric(self._filter_rules(rules), alerts)
|
||||
|
||||
def _filter_rules(self, rules):
|
||||
'''
|
||||
Get list of of rules and return rule filter by labels (type=cloud_health && service).
|
||||
|
||||
Args:
|
||||
rules (list): List of rules form the inventory api
|
||||
|
||||
Return:
|
||||
list: List of rules filtered
|
||||
'''
|
||||
return [
|
||||
rule
|
||||
for rule in rules
|
||||
if rule['labels'].get('type') == 'cloud_health' and 'service' in rule['labels']
|
||||
]
|
||||
|
||||
def _get_rules(self):
|
||||
'''
|
||||
Get rules on the inventory api.
|
||||
|
||||
Return:
|
||||
list: List of rules form the inventory api
|
||||
'''
|
||||
alerts = self._get_request_paginated(
|
||||
self.url_inventory+'/api/inventory/alert-rules')
|
||||
return alerts
|
||||
|
||||
def _get_alertes_triggered(self):
|
||||
'''
|
||||
Get alerts firing on the promtheus api.
|
||||
|
||||
Return:
|
||||
list: List of all alerts form prometheus
|
||||
'''
|
||||
payload = {
|
||||
'dedup': 'true',
|
||||
'query': 'ALERTS{alertstate="firing"}'
|
||||
}
|
||||
|
||||
response = self._get_request(
|
||||
self.url_promehteus + '/api/v1/query', payload)
|
||||
|
||||
return response['data']['result']
|
||||
|
||||
def _get_request_paginated(self, url):
|
||||
'''
|
||||
Make GET request with pagination.
|
||||
|
||||
Args:
|
||||
url (str): full api url to reach
|
||||
|
||||
Return:
|
||||
list: all items contacted in one list
|
||||
'''
|
||||
page = 1
|
||||
result = []
|
||||
|
||||
while True:
|
||||
self.log.debug(f'Get page {page} by batch of {url}')
|
||||
payload = {
|
||||
'pageSize': self.inventory_api_batch_size,
|
||||
'page': page,
|
||||
'env': self.env
|
||||
}
|
||||
|
||||
data = self._get_request(url, payload)
|
||||
result.extend(data['items'])
|
||||
if page >= math.ceil(data['total']/self.inventory_api_batch_size):
|
||||
break
|
||||
page += 1
|
||||
return result
|
||||
|
||||
def _get_request(self, url, payload):
|
||||
'''
|
||||
Make a GET request to an api with a payload.
|
||||
|
||||
Args:
|
||||
url (str): api url to reach
|
||||
payload (dict): payload to add in the request
|
||||
|
||||
Return:
|
||||
dict: Json of the result
|
||||
'''
|
||||
session = requests.Session()
|
||||
|
||||
header = {'Authorization': f'Bearer {self.token}'}
|
||||
|
||||
retries = Retry(total=100, backoff_factor=5,
|
||||
status_forcelist=[502, 503, 504])
|
||||
|
||||
session.mount('https://', HTTPAdapter(max_retries=retries))
|
||||
|
||||
response = session.get(url=url,
|
||||
headers=header, params=payload)
|
||||
|
||||
if response.status_code == 401:
|
||||
self.log.error(
|
||||
f'Authentification failed with token on {url}.')
|
||||
exit(1)
|
||||
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
return data
|
||||
|
||||
def _generate_metric(self, rules, alerts):
|
||||
'''
|
||||
Generate prometheus metrics collections with the rules and the alerts.
|
||||
|
||||
Args:
|
||||
rules (list): list of all rules
|
||||
alerts (list): list of all alerts
|
||||
'''
|
||||
self.gauge._metrics.clear()
|
||||
for rule in rules:
|
||||
for env in rule['env']:
|
||||
if env == "int":
|
||||
env = "test"
|
||||
state = 'up'
|
||||
alerts_count = 0
|
||||
for alert in list(alerts):
|
||||
if rule['id'] == alert['metric']['ruleId']:
|
||||
state = 'firing'
|
||||
alerts_count += 1
|
||||
alerts.remove(alert)
|
||||
self.gauge.labels(env, rule['alert'], rule['labels'].get('service'),
|
||||
rule['labels'].get('type'), state).set(alerts_count)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option('--url-inventory', '-u', required=True, help='Base url to reach the inventory. env var INVENTORY_URL', envvar='INVENTORY_URL')
|
||||
@click.option('--url-promehteus', '-m', required=True, help='Base url to reach the the prometheus. env var PROMETHEUS_URL', envvar='PROMETHEUS_URL')
|
||||
@click.option('--token', '-t', required=True, help='Bear token to get authentificate to the inventory. env var INVENTORY_TOKEN', envvar='INVENTORY_TOKEN')
|
||||
@click.option('--env', '-e', required=True, help='Environment for rules. env var INVENTORY_ENV', envvar='INVENTORY_ENV')
|
||||
@click.option('--listen-port', '-p', default=8000, help='Port where the prometheus page will be publish (default=8000) or env var EXPORTER_PORT', envvar='EXPORTER_PORT')
|
||||
@click.option('--log-level', '-l', default='info', help='Log level can be: debug, info, error (default=info) or env var LOG_LEVEL', envvar='LOG_LEVEL')
|
||||
def rules_exporter(url_inventory, url_promehteus, env, token, listen_port, log_level):
|
||||
exporter = RulesExporter(url_inventory, url_promehteus, token, env,
|
||||
port=listen_port, log_level=log_level)
|
||||
exporter.start_http_server()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
rules_exporter()
|
|
@ -0,0 +1,22 @@
|
|||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
|
@ -0,0 +1,5 @@
|
|||
apiVersion: v1
|
||||
appVersion: "1.0"
|
||||
description: A Helm chart to deploy the rules exporter from the inventory
|
||||
name: rules-exporter
|
||||
version: 0.1.0
|
|
@ -0,0 +1,32 @@
|
|||
{{/* vim: set filetype=mustache: */}}
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "rules-exporter.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "rules-exporter.fullname" -}}
|
||||
{{- if .Values.fullnameOverride -}}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
|
||||
{{- else -}}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride -}}
|
||||
{{- if contains $name .Release.Name -}}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
|
||||
{{- else -}}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "rules-exporter.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
|
@ -0,0 +1,64 @@
|
|||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "rules-exporter.fullname" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ include "rules-exporter.name" . }}
|
||||
helm.sh/chart: {{ include "rules-exporter.chart" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: {{ include "rules-exporter.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ include "rules-exporter.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
spec:
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
env:
|
||||
- name: INVENTORY_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: prometheus-rules-exporter-secrets
|
||||
key: inventory-token
|
||||
- name: INVENTORY_URL
|
||||
value: {{ .Values.urlInventory }}
|
||||
- name: INVENTORY_ENV
|
||||
value: {{ .Values.env }}
|
||||
- name: PROMETHEUS_URL
|
||||
value: {{ .Values.urlPrometheus }}
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8000
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: http
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /
|
||||
port: http
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 12 }}
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
|
@ -0,0 +1,23 @@
|
|||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "rules-exporter.fullname" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/path: "/"
|
||||
labels:
|
||||
app.kubernetes.io/name: {{ include "rules-exporter.name" . }}
|
||||
helm.sh/chart: {{ include "rules-exporter.chart" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
spec:
|
||||
type: {{ .Values.service.type }}
|
||||
ports:
|
||||
- port: {{ .Values.service.port }}
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
name: http
|
||||
selector:
|
||||
app.kubernetes.io/name: {{ include "rules-exporter.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
|
@ -0,0 +1,2 @@
|
|||
urlPrometheus: https://prometheus.prd.mon.corp.unity3d.com
|
||||
env: prd
|
|
@ -0,0 +1,2 @@
|
|||
urlPrometheus: https://prometheus.stg.mon.corp.unity3d.com
|
||||
env: stg
|
|
@ -0,0 +1,2 @@
|
|||
urlPrometheus: https://prometheus.test.mon.corp.unity3d.com
|
||||
env: test
|
|
@ -0,0 +1,34 @@
|
|||
# Default values for rules-exporter.
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
replicaCount: 1
|
||||
|
||||
image:
|
||||
repository: us.gcr.io/unity-cs-devops-gcr-prd/prometheus-rules-exporter
|
||||
tag: "latest"
|
||||
pullPolicy: Always
|
||||
|
||||
nameOverride: ""
|
||||
fullnameOverride: ""
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 8000
|
||||
|
||||
urlInventory: https://inventory.internal.unity3d.com
|
||||
urlPrometheus:
|
||||
|
||||
resources: {}
|
||||
|
||||
nodeSelector:
|
||||
product: devops
|
||||
|
||||
tolerations:
|
||||
- effect: NoSchedule
|
||||
key: product
|
||||
operator: Equal
|
||||
value: devops
|
||||
|
||||
affinity: {}
|
||||
|
||||
env: {}
|
|
@ -0,0 +1,3 @@
|
|||
terraform {
|
||||
backend "gcs" {}
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
data "vault_generic_secret" "secrets" {
|
||||
path = "${var.vault_base_path}/${var.env}/prometheus-rules-exporter/inventory-token"
|
||||
}
|
||||
|
||||
resource "kubernetes_secret" "config" {
|
||||
metadata {
|
||||
name = "prometheus-rules-exporter-secrets"
|
||||
namespace = var.namespace
|
||||
}
|
||||
|
||||
data = data.vault_generic_secret.secrets.data
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
provider "kubernetes" {
|
||||
}
|
||||
|
||||
provider "vault" {
|
||||
address = "https://vault.corp.unity3d.com"
|
||||
}
|
||||
|
||||
provider "google" {
|
||||
project = var.project
|
||||
region = var.region
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
variable "project" {
|
||||
type = string
|
||||
description = "Project ID to use."
|
||||
}
|
||||
|
||||
variable "env" {
|
||||
type = string
|
||||
description = "The environment where we are deploying."
|
||||
}
|
||||
|
||||
variable "vault_base_path" {
|
||||
type = string
|
||||
description = "The base path of the vault secret."
|
||||
default = "secret/common-devops"
|
||||
}
|
||||
|
||||
variable "namespace" {
|
||||
type = string
|
||||
description = "Kubernetes namespace where we will deploy the container"
|
||||
default = "devops-monitoring"
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
project = "unity-cs-common-prd"
|
||||
env = "prd"
|
|
@ -0,0 +1,2 @@
|
|||
project = "unity-cs-common-stg"
|
||||
env = "stg"
|
|
@ -0,0 +1,7 @@
|
|||
terraform {
|
||||
source = "${get_parent_terragrunt_dir()}/../modules/${path_relative_to_include()}"
|
||||
}
|
||||
|
||||
include {
|
||||
path = find_in_parent_folders()
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
project = "unity-cs-common-test"
|
||||
env = "test"
|
|
@ -0,0 +1,29 @@
|
|||
remote_state {
|
||||
backend = "gcs"
|
||||
disable_init = tobool(get_env("TG_DISABLE_INIT", "false"))
|
||||
|
||||
config = {
|
||||
bucket = "${get_env("TF_VAR_project", "unity-cs-common-test")}-terraform-tg"
|
||||
prefix = "${get_env("TF_VAR_region", "us-central1")}/${path_relative_to_include()}"
|
||||
project = "${get_env("TF_VAR_project", "unity-cs-common-test")}"
|
||||
location = "us"
|
||||
gcs_bucket_labels = {
|
||||
business_unit = "cloudservices"
|
||||
role = "terraform_state"
|
||||
product = "devops_monitoring"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
terraform {
|
||||
|
||||
extra_arguments "common_var" {
|
||||
commands = get_terraform_commands_that_need_vars()
|
||||
|
||||
optional_var_files = [
|
||||
"${get_terragrunt_dir()}/default.tfvars",
|
||||
"${get_terragrunt_dir()}/${get_env("TF_VAR_env", "test")}.tfvars",
|
||||
"${get_terragrunt_dir()}/${get_env("TF_VAR_region", "us-central1")}-${get_env("TF_VAR_env", "test")}.tfvars",
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func home(w http.ResponseWriter, req *http.Request) {
|
||||
|
||||
fmt.Fprintf(w, "Home\n")
|
||||
}
|
||||
|
||||
func main() {
|
||||
|
||||
http.HandleFunc("/", home)
|
||||
|
||||
http.ListenAndServe(":8090", nil)
|
||||
}
|
Загрузка…
Ссылка в новой задаче