prometheus-collector/AddonTerraformTemplate/main.tf

632 строки
26 KiB
HCL

resource "azurerm_resource_group" "rg" {
location = var.resource_group_location
name = "defaultPrometheusOnboardingResourceGroup"
}
resource "azurerm_kubernetes_cluster" "k8s" {
location = azurerm_resource_group.rg.location
name = var.cluster_name
resource_group_name = azurerm_resource_group.rg.name
dns_prefix = var.dns_prefix
tags = {
Environment = "Development"
}
default_node_pool {
name = "agentpool"
vm_size = "Standard_D2_v2"
node_count = var.agent_count
}
monitor_metrics {
annotations_allowed = var.metric_annotations_allowlist
labels_allowed = var.metric_labels_allowlist
}
network_profile {
network_plugin = "kubenet"
load_balancer_sku = "standard"
}
identity {
type = "SystemAssigned"
}
}
resource "azurerm_monitor_workspace" "amw" {
name = var.monitor_workspace_name
resource_group_name = azurerm_resource_group.rg.name
location = azurerm_resource_group.rg.location
}
resource "azurerm_monitor_data_collection_endpoint" "dce" {
name = substr("MSProm-${azurerm_resource_group.rg.location}-${var.cluster_name}", 0, min(44, length("MSProm-${azurerm_resource_group.rg.location}-${var.cluster_name}")))
resource_group_name = azurerm_resource_group.rg.name
location = azurerm_resource_group.rg.location
kind = "Linux"
}
# Logic to determine region mismatch
locals {
dce_region_mismatch = var.cluster_region != var.amw_region
}
# Create another DCE if the regions don't match and is_private_cluster is true
resource "azurerm_monitor_data_collection_endpoint" "dce_mismatch" {
count = (local.dce_region_mismatch && var.is_private_cluster) ? 1 : 0
name = substr("MSProm-PL-${azurerm_resource_group.rg.location}-${var.cluster_name}", 0, min(44, length("MSProm-PL-${azurerm_resource_group.rg.location}-${var.cluster_name}")))
resource_group_name = azurerm_resource_group.rg.name
location = var.cluster_region
kind = "Linux"
}
resource "azurerm_monitor_data_collection_rule" "dcr" {
name = substr("MSProm-${azurerm_resource_group.rg.location}-${var.cluster_name}", 0, min(64, length("MSProm-${azurerm_resource_group.rg.location}-${var.cluster_name}")))
resource_group_name = azurerm_resource_group.rg.name
location = azurerm_resource_group.rg.location
data_collection_endpoint_id = azurerm_monitor_data_collection_endpoint.dce.id
kind = "Linux"
destinations {
monitor_account {
monitor_account_id = azurerm_monitor_workspace.amw.id
name = "MonitoringAccount1"
}
}
data_flow {
streams = ["Microsoft-PrometheusMetrics"]
destinations = ["MonitoringAccount1"]
}
data_sources {
prometheus_forwarder {
streams = ["Microsoft-PrometheusMetrics"]
name = "PrometheusDataSource"
}
}
description = "DCR for Azure Monitor Metrics Profile (Managed Prometheus)"
depends_on = [
azurerm_monitor_data_collection_endpoint.dce
]
}
resource "azurerm_monitor_data_collection_rule_association" "dcra" {
name = "MSProm-${azurerm_resource_group.rg.location}-${var.cluster_name}"
target_resource_id = azurerm_kubernetes_cluster.k8s.id
data_collection_rule_id = azurerm_monitor_data_collection_rule.dcr.id
description = "Association of data collection rule. Deleting this association will break the data collection for this AKS Cluster."
depends_on = [
azurerm_monitor_data_collection_rule.dcr
]
}
resource "azurerm_monitor_data_collection_rule_association" "dcra_mismatch" {
count = (local.dce_region_mismatch && var.is_private_cluster) ? 1 : 0
target_resource_id = azurerm_kubernetes_cluster.k8s.id
data_collection_endpoint_id = local.dce_region_mismatch ? azurerm_monitor_data_collection_endpoint.dce_mismatch[0].id : azurerm_monitor_data_collection_endpoint.dce.id
description = "Association of data collection endpoint for private link clusters. Deleting this association will break the data collection for this AKS Cluster."
depends_on = [
azurerm_monitor_data_collection_endpoint.dce
]
}
resource "azurerm_dashboard_grafana" "grafana" {
name = var.grafana_name
resource_group_name = azurerm_resource_group.rg.name
location = var.grafana_location
grafana_major_version = var.grafana_version
identity {
type = "SystemAssigned"
}
azure_monitor_workspace_integrations {
resource_id = azurerm_monitor_workspace.amw.id
}
}
resource "azurerm_role_assignment" "datareaderrole" {
scope = azurerm_monitor_workspace.amw.id
role_definition_id = "/subscriptions/${split("/", azurerm_monitor_workspace.amw.id)[2]}/providers/Microsoft.Authorization/roleDefinitions/b0d8363b-8ddd-447d-831f-62ca05bff136"
principal_id = azurerm_dashboard_grafana.grafana.identity.0.principal_id
}
resource "azurerm_monitor_alert_prometheus_rule_group" "node_recording_rules_rule_group" {
name = "NodeRecordingRulesRuleGroup-${var.cluster_name}"
location = azurerm_resource_group.rg.location
resource_group_name = azurerm_resource_group.rg.name
cluster_name = var.cluster_name
description = "Node Recording Rules Rule Group"
rule_group_enabled = true
interval = "PT1M"
scopes = [azurerm_monitor_workspace.amw.id,azurerm_kubernetes_cluster.k8s.id]
rule {
enabled = true
record = "instance:node_num_cpu:sum"
expression = <<EOF
count without (cpu, mode) ( node_cpu_seconds_total{job="node",mode="idle"})
EOF
}
rule {
enabled = true
record = "instance:node_cpu_utilisation:rate5m"
expression = <<EOF
1 - avg without (cpu) ( sum without (mode) (rate(node_cpu_seconds_total{job="node", mode=~"idle|iowait|steal"}[5m])))
EOF
}
rule {
enabled = true
record = "instance:node_load1_per_cpu:ratio"
expression = <<EOF
( node_load1{job="node"}/ instance:node_num_cpu:sum{job="node"})
EOF
}
rule {
enabled = true
record = "instance:node_memory_utilisation:ratio"
expression = <<EOF
1 - ( ( node_memory_MemAvailable_bytes{job="node"} or ( node_memory_Buffers_bytes{job="node"} + node_memory_Cached_bytes{job="node"} + node_memory_MemFree_bytes{job="node"} + node_memory_Slab_bytes{job="node"} ) )/ node_memory_MemTotal_bytes{job="node"})
EOF
}
rule {
enabled = true
record = "instance:node_vmstat_pgmajfault:rate5m"
expression = <<EOF
rate(node_vmstat_pgmajfault{job="node"}[5m])
EOF
}
rule {
enabled = true
record = "instance_device:node_disk_io_time_seconds:rate5m"
expression = <<EOF
rate(node_disk_io_time_seconds_total{job="node", device!=""}[5m])
EOF
}
rule {
enabled = true
record = "instance_device:node_disk_io_time_weighted_seconds:rate5m"
expression = <<EOF
rate(node_disk_io_time_weighted_seconds_total{job="node", device!=""}[5m])
EOF
}
rule {
enabled = true
record = "instance:node_network_receive_bytes_excluding_lo:rate5m"
expression = <<EOF
sum without (device) ( rate(node_network_receive_bytes_total{job="node", device!="lo"}[5m]))
EOF
}
rule {
enabled = true
record = "instance:node_network_transmit_bytes_excluding_lo:rate5m"
expression = <<EOF
sum without (device) ( rate(node_network_transmit_bytes_total{job="node", device!="lo"}[5m]))
EOF
}
rule {
enabled = true
record = "instance:node_network_receive_drop_excluding_lo:rate5m"
expression = <<EOF
sum without (device) ( rate(node_network_receive_drop_total{job="node", device!="lo"}[5m]))
EOF
}
rule {
enabled = true
record = "instance:node_network_transmit_drop_excluding_lo:rate5m"
expression = <<EOF
sum without (device) ( rate(node_network_transmit_drop_total{job="node", device!="lo"}[5m]))
EOF
}
}
resource "azurerm_monitor_alert_prometheus_rule_group" "kubernetes_recording_rules_rule_group" {
name = "KubernetesRecordingRulesRuleGroup-${var.cluster_name}"
location = azurerm_resource_group.rg.location
resource_group_name = azurerm_resource_group.rg.name
cluster_name = var.cluster_name
description = "Kubernetes Recording Rules Rule Group"
rule_group_enabled = true
interval = "PT1M"
scopes = [azurerm_monitor_workspace.amw.id,azurerm_kubernetes_cluster.k8s.id]
rule {
enabled = true
record = "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate"
expression = <<EOF
sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{job="cadvisor", image!=""}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""}))
EOF
}
rule {
enabled = true
record = "node_namespace_pod_container:container_memory_working_set_bytes"
expression = <<EOF
container_memory_working_set_bytes{job="cadvisor", image!=""}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""}))
EOF
}
rule {
enabled = true
record = "node_namespace_pod_container:container_memory_rss"
expression = <<EOF
container_memory_rss{job="cadvisor", image!=""}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""}))
EOF
}
rule {
enabled = true
record = "node_namespace_pod_container:container_memory_cache"
expression = <<EOF
container_memory_cache{job="cadvisor", image!=""}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""}))
EOF
}
rule {
enabled = true
record = "node_namespace_pod_container:container_memory_swap"
expression = <<EOF
container_memory_swap{job="cadvisor", image!=""}* on (namespace, pod) group_left(node) topk by(namespace, pod) (1, max by(namespace, pod, node) (kube_pod_info{node!=""}))
EOF
}
rule {
enabled = true
record = "cluster:namespace:pod_memory:active:kube_pod_container_resource_requests"
expression = <<EOF
kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~"Pending|Running"} == 1))
EOF
}
rule {
enabled = true
record = "namespace_memory:kube_pod_container_resource_requests:sum"
expression = <<EOF
sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) ))
EOF
}
rule {
enabled = true
record = "cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests"
expression = <<EOF
kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~"Pending|Running"} == 1))
EOF
}
rule {
enabled = true
record = "namespace_cpu:kube_pod_container_resource_requests:sum"
expression = <<EOF
sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) ))
EOF
}
rule {
enabled = true
record = "cluster:namespace:pod_memory:active:kube_pod_container_resource_limits"
expression = <<EOF
kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~"Pending|Running"} == 1))
EOF
}
rule {
enabled = true
record = "namespace_memory:kube_pod_container_resource_limits:sum"
expression = <<EOF
sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) ))
EOF
}
rule {
enabled = true
record = "cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits"
expression = <<EOF
kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster)group_left() max by (namespace, pod, cluster) ( (kube_pod_status_phase{phase=~"Pending|Running"} == 1) )
EOF
}
rule {
enabled = true
record = "namespace_cpu:kube_pod_container_resource_limits:sum"
expression = <<EOF
sum by (namespace, cluster) ( sum by (namespace, pod, cluster) ( max by (namespace, pod, container, cluster) ( kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) ( kube_pod_status_phase{phase=~"Pending|Running"} == 1 ) ))
EOF
}
rule {
enabled = true
record = "namespace_workload_pod:kube_pod_owner:relabel"
expression = <<EOF
max by (cluster, namespace, workload, pod) ( label_replace( label_replace( kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"}, "replicaset", "$1", "owner_name", "(.*)" ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) ( 1, max by (replicaset, namespace, owner_name) ( kube_replicaset_owner{job="kube-state-metrics"} ) ), "workload", "$1", "owner_name", "(.*)" ))
EOF
labels = {
workload_type = "deployment"
}
}
rule {
enabled = true
record = "namespace_workload_pod:kube_pod_owner:relabel"
expression = <<EOF
max by (cluster, namespace, workload, pod) ( label_replace( kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"}, "workload", "$1", "owner_name", "(.*)" ))
EOF
labels = {
workload_type = "daemonset"
}
}
rule {
enabled = true
record = "namespace_workload_pod:kube_pod_owner:relabel"
expression = <<EOF
max by (cluster, namespace, workload, pod) ( label_replace( kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"}, "workload", "$1", "owner_name", "(.*)" ))
EOF
labels = {
workload_type = "statefulset"
}
}
rule {
enabled = true
record = "namespace_workload_pod:kube_pod_owner:relabel"
expression = <<EOF
max by (cluster, namespace, workload, pod) ( label_replace( kube_pod_owner{job="kube-state-metrics", owner_kind="Job"}, "workload", "$1", "owner_name", "(.*)" ))
EOF
labels = {
workload_type = "job"
}
}
rule {
enabled = true
record = ":node_memory_MemAvailable_bytes:sum"
expression = <<EOF
sum( node_memory_MemAvailable_bytes{job="node"} or ( node_memory_Buffers_bytes{job="node"} + node_memory_Cached_bytes{job="node"} + node_memory_MemFree_bytes{job="node"} + node_memory_Slab_bytes{job="node"} )) by (cluster)
EOF
}
rule {
enabled = true
record = "cluster:node_cpu:ratio_rate5m"
expression = <<EOF
sum(rate(node_cpu_seconds_total{job="node",mode!="idle",mode!="iowait",mode!="steal"}[5m])) by (cluster) /count(sum(node_cpu_seconds_total{job="node"}) by (cluster, instance, cpu)) by (cluster)
EOF
}
}
resource "azurerm_monitor_alert_prometheus_rule_group" "node_and_kubernetes_recording_rules_rule_group_win" {
name = "NodeAndKubernetesRecordingRulesRuleGroup-Win-${var.cluster_name}"
location = azurerm_resource_group.rg.location
resource_group_name = azurerm_resource_group.rg.name
cluster_name = var.cluster_name
description = "Node and Kubernetes Recording Rules Rule Group for Windows Nodes"
rule_group_enabled = true
interval = "PT1M"
scopes = [azurerm_monitor_workspace.amw.id,azurerm_kubernetes_cluster.k8s.id]
rule {
enabled = true
record = "node:windows_node_filesystem_usage:"
expression = <<EOF
max by (instance,volume)((windows_logical_disk_size_bytes{job="windows-exporter"} - windows_logical_disk_free_bytes{job="windows-exporter"}) / windows_logical_disk_size_bytes{job="windows-exporter"})
EOF
}
rule {
enabled = true
record = "node:windows_node_filesystem_avail:"
expression = <<EOF
max by (instance, volume) (windows_logical_disk_free_bytes{job="windows-exporter"} / windows_logical_disk_size_bytes{job="windows-exporter"})
EOF
}
rule {
enabled = true
record = ":windows_node_net_utilisation:sum_irate"
expression = <<EOF
sum(irate(windows_net_bytes_total{job="windows-exporter"}[5m]))
EOF
}
rule {
enabled = true
record = "node:windows_node_net_utilisation:sum_irate"
expression = <<EOF
sum by (instance) ((irate(windows_net_bytes_total{job="windows-exporter"}[5m])))
EOF
}
rule {
enabled = true
record = ":windows_node_net_saturation:sum_irate"
expression = <<EOF
sum(irate(windows_net_packets_received_discarded_total{job="windows-exporter"}[5m])) + sum(irate(windows_net_packets_outbound_discarded_total{job="windows-exporter"}[5m]))
EOF
}
rule {
enabled = true
record = "node:windows_node_net_saturation:sum_irate"
expression = <<EOF
sum by (instance) ((irate(windows_net_packets_received_discarded_total{job="windows-exporter"}[5m]) + irate(windows_net_packets_outbound_discarded_total{job="windows-exporter"}[5m])))
EOF
}
rule {
enabled = true
record = "windows_pod_container_available"
expression = <<EOF
windows_container_available{job="windows-exporter", container_id != ""} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace)
EOF
}
rule {
enabled = true
record = "windows_container_total_runtime"
expression = <<EOF
windows_container_cpu_usage_seconds_total{job="windows-exporter", container_id != ""} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace)
EOF
}
rule {
enabled = true
record = "windows_container_memory_usage"
expression = <<EOF
windows_container_memory_usage_commit_bytes{job="windows-exporter", container_id != ""} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace)
EOF
}
rule {
enabled = true
record = "windows_container_private_working_set_usage"
expression = <<EOF
windows_container_memory_usage_private_working_set_bytes{job="windows-exporter", container_id != ""} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace)
EOF
}
rule {
enabled = true
record = "windows_container_network_received_bytes_total"
expression = <<EOF
windows_container_network_receive_bytes_total{job="windows-exporter", container_id != ""} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace)
EOF
}
rule {
enabled = true
record = "windows_container_network_transmitted_bytes_total"
expression = <<EOF
windows_container_network_transmit_bytes_total{job="windows-exporter", container_id != ""} * on(container_id) group_left(container, pod, namespace) max(kube_pod_container_info{job="kube-state-metrics", container_id != ""}) by(container, container_id, pod, namespace)
EOF
}
rule {
enabled = true
record = "kube_pod_windows_container_resource_memory_request"
expression = <<EOF
max by (namespace, pod, container) (kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"}) * on(container,pod,namespace) (windows_pod_container_available)
EOF
}
rule {
enabled = true
record = "kube_pod_windows_container_resource_memory_limit"
expression = <<EOF
kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on(container,pod,namespace) (windows_pod_container_available)
EOF
}
rule {
enabled = true
record = "kube_pod_windows_container_resource_cpu_cores_request"
expression = <<EOF
max by (namespace, pod, container) ( kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"}) * on(container,pod,namespace) (windows_pod_container_available)
EOF
}
rule {
enabled = true
record = "kube_pod_windows_container_resource_cpu_cores_limit"
expression = <<EOF
kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on(container,pod,namespace) (windows_pod_container_available)
EOF
}
rule {
enabled = true
record = "namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate"
expression = <<EOF
sum by (namespace, pod, container) (rate(windows_container_total_runtime{}[5m]))
EOF
}
}
resource "azurerm_monitor_alert_prometheus_rule_group" "node_recording_rules_rule_group_win" {
name = "NodeRecordingRulesRuleGroup-Win-${var.cluster_name}"
location = azurerm_resource_group.rg.location
resource_group_name = azurerm_resource_group.rg.name
cluster_name = var.cluster_name
description = "Node and Kubernetes Recording Rules Rule Group for Windows Nodes"
rule_group_enabled = true
interval = "PT1M"
scopes = [azurerm_monitor_workspace.amw.id,azurerm_kubernetes_cluster.k8s.id]
rule {
enabled = true
record = "node:windows_node:sum"
expression = <<EOF
count (windows_system_system_up_time{job="windows-exporter"})
EOF
}
rule {
enabled = true
record = "node:windows_node_num_cpu:sum"
expression = <<EOF
count by (instance) (sum by (instance, core) (windows_cpu_time_total{job="windows-exporter"}))
EOF
}
rule {
enabled = true
record = ":windows_node_cpu_utilisation:avg5m"
expression = <<EOF
1 - avg(rate(windows_cpu_time_total{job="windows-exporter",mode="idle"}[5m]))
EOF
}
rule {
enabled = true
record = "node:windows_node_cpu_utilisation:avg5m"
expression = <<EOF
1 - avg by (instance) (rate(windows_cpu_time_total{job="windows-exporter",mode="idle"}[5m]))
EOF
}
rule {
enabled = true
record = ":windows_node_memory_utilisation:"
expression = <<EOF
1 -sum(windows_memory_available_bytes{job="windows-exporter"})/sum(windows_os_visible_memory_bytes{job="windows-exporter"})
EOF
}
rule {
enabled = true
record = ":windows_node_memory_MemFreeCached_bytes:sum"
expression = <<EOF
sum(windows_memory_available_bytes{job="windows-exporter"} + windows_memory_cache_bytes{job="windows-exporter"})
EOF
}
rule {
enabled = true
record = "node:windows_node_memory_totalCached_bytes:sum"
expression = <<EOF
(windows_memory_cache_bytes{job="windows-exporter"} + windows_memory_modified_page_list_bytes{job="windows-exporter"} + windows_memory_standby_cache_core_bytes{job="windows-exporter"} + windows_memory_standby_cache_normal_priority_bytes{job="windows-exporter"} + windows_memory_standby_cache_reserve_bytes{job="windows-exporter"})
EOF
}
rule {
enabled = true
record = ":windows_node_memory_MemTotal_bytes:sum"
expression = <<EOF
sum(windows_os_visible_memory_bytes{job="windows-exporter"})
EOF
}
rule {
enabled = true
record = "node:windows_node_memory_bytes_available:sum"
expression = <<EOF
sum by (instance) ((windows_memory_available_bytes{job="windows-exporter"}))
EOF
}
rule {
enabled = true
record = "node:windows_node_memory_bytes_total:sum"
expression = <<EOF
sum by (instance) (windows_os_visible_memory_bytes{job="windows-exporter"})
EOF
}
rule {
enabled = true
record = "node:windows_node_memory_utilisation:ratio"
expression = <<EOF
(node:windows_node_memory_bytes_total:sum - node:windows_node_memory_bytes_available:sum) / scalar(sum(node:windows_node_memory_bytes_total:sum))
EOF
}
rule {
enabled = true
record = "node:windows_node_memory_utilisation:"
expression = <<EOF
1 - (node:windows_node_memory_bytes_available:sum / node:windows_node_memory_bytes_total:sum)
EOF
}
rule {
enabled = true
record = "node:windows_node_memory_swap_io_pages:irate"
expression = <<EOF
irate(windows_memory_swap_page_operations_total{job="windows-exporter"}[5m])
EOF
}
rule {
enabled = true
record = ":windows_node_disk_utilisation:avg_irate"
expression = <<EOF
avg(irate(windows_logical_disk_read_seconds_total{job="windows-exporter"}[5m]) + irate(windows_logical_disk_write_seconds_total{job="windows-exporter"}[5m]))
EOF
}
rule {
enabled = true
record = "node:windows_node_disk_utilisation:avg_irate"
expression = <<EOF
avg by (instance) ((irate(windows_logical_disk_read_seconds_total{job="windows-exporter"}[5m]) + irate(windows_logical_disk_write_seconds_total{job="windows-exporter"}[5m])))
EOF
}
}