Onboard Managed Prometheus per stamp (#755)

* add prometheus

* add missing provider

* add missing provider block

* add provider block

* add a dependency

* fix

* adding dcra

* add missing provider blocks

* add missing provider block

* export properties

* rename and move rule

* fix broken reference

* fix

* update

* add monitor_metrics

* update azurerm from 3.34 to 3.37

* change endpoint id

* change dataCollectionRuleId

* rename prometheus az mon workspace

* automate monitoring data reader permissions

* renaming components

* rename state store (#774) (#775)

* align version updates

* bump

* fix

* test workload group

* adding parent back

* adding another rule

* bump docs

* add second rule group

* bugfix

* add diagram

* Update `feature/managedprom` from `main` (#886)

* Component updates 03.03.2023 (#882)

* Bump azure/azapi in /src/testing/loadtest-azure/infra

Bumps [azure/azapi](https://github.com/Azure/terraform-provider-azapi) from 1.3.0 to 1.4.0.
- [Release notes](https://github.com/Azure/terraform-provider-azapi/releases)
- [Changelog](https://github.com/Azure/terraform-provider-azapi/blob/main/CHANGELOG.md)
- [Commits](https://github.com/Azure/terraform-provider-azapi/compare/v1.3.0...v1.4.0)

---
updated-dependencies:
- dependency-name: azure/azapi
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump playwright-chromium

Bumps [playwright-chromium](https://github.com/Microsoft/playwright) from 1.30.0 to 1.31.1.
- [Release notes](https://github.com/Microsoft/playwright/releases)
- [Commits](https://github.com/Microsoft/playwright/compare/v1.30.0...v1.31.1)

---
updated-dependencies:
- dependency-name: playwright-chromium
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump hashicorp/azurerm in /src/testing/loadtest-azure/infra

Bumps [hashicorp/azurerm](https://github.com/hashicorp/terraform-provider-azurerm) from 3.41.0 to 3.45.0.
- [Release notes](https://github.com/hashicorp/terraform-provider-azurerm/releases)
- [Changelog](https://github.com/hashicorp/terraform-provider-azurerm/blob/main/CHANGELOG.md)
- [Commits](https://github.com/hashicorp/terraform-provider-azurerm/compare/v3.41.0...v3.45.0)

---
updated-dependencies:
- dependency-name: hashicorp/azurerm
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Azure.Storage.Blobs in /src/app/AlwaysOn.Tests

Bumps [Azure.Storage.Blobs](https://github.com/Azure/azure-sdk-for-net) from 12.14.1 to 12.15.0.
- [Release notes](https://github.com/Azure/azure-sdk-for-net/releases)
- [Commits](https://github.com/Azure/azure-sdk-for-net/compare/Azure.Storage.Blobs_12.14.1...Azure.Storage.Blobs_12.15.0)

---
updated-dependencies:
- dependency-name: Azure.Storage.Blobs
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump node from 19.5.0 to 19.7.0 in /src/app/AlwaysOn.UI

Bumps node from 19.5.0 to 19.7.0.

---
updated-dependencies:
- dependency-name: node
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Azure.Data.Tables in /src/app/AlwaysOn.BackgroundProcessor

Bumps [Azure.Data.Tables](https://github.com/Azure/azure-sdk-for-net) from 12.7.1 to 12.8.0.
- [Release notes](https://github.com/Azure/azure-sdk-for-net/releases)
- [Commits](https://github.com/Azure/azure-sdk-for-net/compare/Azure.Data.Tables_12.7.1...Azure.Data.Tables_12.8.0)

---
updated-dependencies:
- dependency-name: Azure.Data.Tables
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Microsoft.Azure.Cosmos in /src/app/AlwaysOn.Shared

Bumps [Microsoft.Azure.Cosmos](https://github.com/Azure/azure-cosmos-dotnet-v3) from 3.31.2 to 3.32.0.
- [Release notes](https://github.com/Azure/azure-cosmos-dotnet-v3/releases)
- [Changelog](https://github.com/Azure/azure-cosmos-dotnet-v3/blob/master/changelog.md)
- [Commits](https://github.com/Azure/azure-cosmos-dotnet-v3/compare/3.31.2...3.32.0)

---
updated-dependencies:
- dependency-name: Microsoft.Azure.Cosmos
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Azure.Identity in /src/app/AlwaysOn.HealthService

Bumps [Azure.Identity](https://github.com/Azure/azure-sdk-for-net) from 1.8.1 to 1.8.2.
- [Release notes](https://github.com/Azure/azure-sdk-for-net/releases)
- [Commits](https://github.com/Azure/azure-sdk-for-net/compare/Azure.Identity_1.8.1...Azure.Identity_1.8.2)

---
updated-dependencies:
- dependency-name: Azure.Identity
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump hashicorp/azurerm

Bumps [hashicorp/azurerm](https://github.com/hashicorp/terraform-provider-azurerm) from 3.41.0 to 3.46.0.
- [Release notes](https://github.com/hashicorp/terraform-provider-azurerm/releases)
- [Changelog](https://github.com/hashicorp/terraform-provider-azurerm/blob/main/CHANGELOG.md)
- [Commits](https://github.com/hashicorp/terraform-provider-azurerm/compare/v3.41.0...v3.46.0)

---
updated-dependencies:
- dependency-name: hashicorp/azurerm
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Microsoft.Extensions.Hosting

Bumps [Microsoft.Extensions.Hosting](https://github.com/dotnet/runtime) from 7.0.0 to 7.0.1.
- [Release notes](https://github.com/dotnet/runtime/releases)
- [Commits](https://github.com/dotnet/runtime/compare/v7.0.0...v7.0.1)

---
updated-dependencies:
- dependency-name: Microsoft.Extensions.Hosting
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump hashicorp/azurerm in /src/infra/monitoring/grafana/terraform/stamps

Bumps [hashicorp/azurerm](https://github.com/hashicorp/terraform-provider-azurerm) from 3.41.0 to 3.46.0.
- [Release notes](https://github.com/hashicorp/terraform-provider-azurerm/releases)
- [Changelog](https://github.com/hashicorp/terraform-provider-azurerm/blob/main/CHANGELOG.md)
- [Commits](https://github.com/hashicorp/terraform-provider-azurerm/compare/v3.41.0...v3.46.0)

---
updated-dependencies:
- dependency-name: hashicorp/azurerm
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump vue from 3.2.45 to 3.2.47 in /src/app/AlwaysOn.UI

Bumps [vue](https://github.com/vuejs/core) from 3.2.45 to 3.2.47.
- [Release notes](https://github.com/vuejs/core/releases)
- [Changelog](https://github.com/vuejs/core/blob/main/CHANGELOG.md)
- [Commits](https://github.com/vuejs/core/compare/v3.2.45...v3.2.47)

---
updated-dependencies:
- dependency-name: vue
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump hashicorp/azurerm in /src/testing/userload-generator/infra

Bumps [hashicorp/azurerm](https://github.com/hashicorp/terraform-provider-azurerm) from 3.41.0 to 3.46.0.
- [Release notes](https://github.com/hashicorp/terraform-provider-azurerm/releases)
- [Changelog](https://github.com/hashicorp/terraform-provider-azurerm/blob/main/CHANGELOG.md)
- [Commits](https://github.com/hashicorp/terraform-provider-azurerm/compare/v3.41.0...v3.46.0)

---
updated-dependencies:
- dependency-name: hashicorp/azurerm
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump hashicorp/azurerm in /src/testing/loadtest-locust/infra

Bumps [hashicorp/azurerm](https://github.com/hashicorp/terraform-provider-azurerm) from 3.41.0 to 3.46.0.
- [Release notes](https://github.com/hashicorp/terraform-provider-azurerm/releases)
- [Changelog](https://github.com/hashicorp/terraform-provider-azurerm/blob/main/CHANGELOG.md)
- [Commits](https://github.com/hashicorp/terraform-provider-azurerm/compare/v3.41.0...v3.46.0)

---
updated-dependencies:
- dependency-name: hashicorp/azurerm
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Microsoft.Azure.WebJobs.Extensions.DurableTask

Bumps [Microsoft.Azure.WebJobs.Extensions.DurableTask](https://github.com/Azure/azure-functions-durable-extension) from 2.9.0 to 2.9.2.
- [Release notes](https://github.com/Azure/azure-functions-durable-extension/releases)
- [Changelog](https://github.com/Azure/azure-functions-durable-extension/blob/dev/azure-pipelines-release.yml)
- [Commits](https://github.com/Azure/azure-functions-durable-extension/compare/v2.9.0...v2.9.2)

---
updated-dependencies:
- dependency-name: Microsoft.Azure.WebJobs.Extensions.DurableTask
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump actions/checkout from 2 to 3

Bumps [actions/checkout](https://github.com/actions/checkout) from 2 to 3.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v2...v3)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Microsoft.Extensions.Diagnostics.HealthChecks.Abstractions

Bumps [Microsoft.Extensions.Diagnostics.HealthChecks.Abstractions](https://github.com/dotnet/aspnetcore) from 7.0.2 to 7.0.3.
- [Release notes](https://github.com/dotnet/aspnetcore/releases)
- [Changelog](https://github.com/dotnet/aspnetcore/blob/main/docs/ReleasePlanning.md)
- [Commits](https://github.com/dotnet/aspnetcore/compare/v7.0.2...v7.0.3)

---
updated-dependencies:
- dependency-name: Microsoft.Extensions.Diagnostics.HealthChecks.Abstractions
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Microsoft.Azure.Cosmos in /src/app/AlwaysOn.CatalogService

Bumps [Microsoft.Azure.Cosmos](https://github.com/Azure/azure-cosmos-dotnet-v3) from 3.32.0 to 3.32.1.
- [Release notes](https://github.com/Azure/azure-cosmos-dotnet-v3/releases)
- [Changelog](https://github.com/Azure/azure-cosmos-dotnet-v3/blob/master/changelog.md)
- [Commits](https://github.com/Azure/azure-cosmos-dotnet-v3/compare/3.32.0...3.32.1)

---
updated-dependencies:
- dependency-name: Microsoft.Azure.Cosmos
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump hashicorp/azurerm in /src/infra/workload/globalresources

Bumps [hashicorp/azurerm](https://github.com/hashicorp/terraform-provider-azurerm) from 3.41.0 to 3.46.0.
- [Release notes](https://github.com/hashicorp/terraform-provider-azurerm/releases)
- [Changelog](https://github.com/hashicorp/terraform-provider-azurerm/blob/main/CHANGELOG.md)
- [Commits](https://github.com/hashicorp/terraform-provider-azurerm/compare/v3.41.0...v3.46.0)

---
updated-dependencies:
- dependency-name: hashicorp/azurerm
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump playwright-core

Bumps [playwright-core](https://github.com/Microsoft/playwright) from 1.30.0 to 1.31.2.
- [Release notes](https://github.com/Microsoft/playwright/releases)
- [Commits](https://github.com/Microsoft/playwright/compare/v1.30.0...v1.31.2)

---
updated-dependencies:
- dependency-name: playwright-core
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Azure.Storage.Blobs in /src/app/AlwaysOn.HealthService

Bumps [Azure.Storage.Blobs](https://github.com/Azure/azure-sdk-for-net) from 12.14.1 to 12.15.0.
- [Release notes](https://github.com/Azure/azure-sdk-for-net/releases)
- [Commits](https://github.com/Azure/azure-sdk-for-net/compare/Azure.Storage.Blobs_12.14.1...Azure.Storage.Blobs_12.15.0)

---
updated-dependencies:
- dependency-name: Azure.Storage.Blobs
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Azure.Identity from 1.8.1 to 1.8.2 in /src/app/AlwaysOn.Tests

Bumps [Azure.Identity](https://github.com/Azure/azure-sdk-for-net) from 1.8.1 to 1.8.2.
- [Release notes](https://github.com/Azure/azure-sdk-for-net/releases)
- [Commits](https://github.com/Azure/azure-sdk-for-net/compare/Azure.Identity_1.8.1...Azure.Identity_1.8.2)

---
updated-dependencies:
- dependency-name: Azure.Identity
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump @microsoft/applicationinsights-web in /src/app/AlwaysOn.UI

Bumps [@microsoft/applicationinsights-web](https://github.com/microsoft/ApplicationInsights-JS) from 2.8.9 to 2.8.10.
- [Release notes](https://github.com/microsoft/ApplicationInsights-JS/releases)
- [Changelog](https://github.com/microsoft/ApplicationInsights-JS/blob/master/RELEASES.md)
- [Commits](https://github.com/microsoft/ApplicationInsights-JS/commits)

---
updated-dependencies:
- dependency-name: "@microsoft/applicationinsights-web"
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Azure.Identity in /src/app/AlwaysOn.BackgroundProcessor

Bumps [Azure.Identity](https://github.com/Azure/azure-sdk-for-net) from 1.8.1 to 1.8.2.
- [Release notes](https://github.com/Azure/azure-sdk-for-net/releases)
- [Commits](https://github.com/Azure/azure-sdk-for-net/compare/Azure.Identity_1.8.1...Azure.Identity_1.8.2)

---
updated-dependencies:
- dependency-name: Azure.Identity
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump NUnit3TestAdapter from 4.3.1 to 4.4.2 in /src/app/AlwaysOn.Tests

Bumps [NUnit3TestAdapter](https://github.com/nunit/nunit3-vs-adapter) from 4.3.1 to 4.4.2.
- [Release notes](https://github.com/nunit/nunit3-vs-adapter/releases)
- [Commits](https://github.com/nunit/nunit3-vs-adapter/compare/V4.3.1...V4.4.2)

---
updated-dependencies:
- dependency-name: NUnit3TestAdapter
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump hashicorp/azurerm in /src/infra/workload/releaseunit

Bumps [hashicorp/azurerm](https://github.com/hashicorp/terraform-provider-azurerm) from 3.41.0 to 3.46.0.
- [Release notes](https://github.com/hashicorp/terraform-provider-azurerm/releases)
- [Changelog](https://github.com/hashicorp/terraform-provider-azurerm/blob/main/CHANGELOG.md)
- [Commits](https://github.com/hashicorp/terraform-provider-azurerm/compare/v3.41.0...v3.46.0)

---
updated-dependencies:
- dependency-name: hashicorp/azurerm
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump @playwright/test

Bumps [@playwright/test](https://github.com/Microsoft/playwright) from 1.30.0 to 1.31.2.
- [Release notes](https://github.com/Microsoft/playwright/releases)
- [Commits](https://github.com/Microsoft/playwright/compare/v1.30.0...v1.31.2)

---
updated-dependencies:
- dependency-name: "@playwright/test"
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* Bump Microsoft.NET.Test.Sdk in /src/app/AlwaysOn.Tests

Bumps [Microsoft.NET.Test.Sdk](https://github.com/microsoft/vstest) from 17.4.0 to 17.5.0.
- [Release notes](https://github.com/microsoft/vstest/releases)
- [Changelog](https://github.com/microsoft/vstest/blob/main/docs/releases.md)
- [Commits](https://github.com/microsoft/vstest/compare/v17.4.0...v17.5.0)

---
updated-dependencies:
- dependency-name: Microsoft.NET.Test.Sdk
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

* bump

* Bump grafana/grafana in /src/infra/monitoring/grafana (#842)

Bumps grafana/grafana from 9.3.6 to 9.4.1.

---
updated-dependencies:
- dependency-name: grafana/grafana
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* Bump minimist (#883)

Bumps [minimist](https://github.com/minimistjs/minimist) from 1.2.5 to 1.2.8.
- [Release notes](https://github.com/minimistjs/minimist/releases)
- [Changelog](https://github.com/minimistjs/minimist/blob/main/CHANGELOG.md)
- [Commits](https://github.com/minimistjs/minimist/compare/v1.2.5...v1.2.8)

---
updated-dependencies:
- dependency-name: minimist
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* workload identity note (#885)

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Hansjoerg Scherer <hjscherer@users.noreply.github.com>

* bump tf versions

* bump

* update provider

* add azapi to root module

* update diag settings block

* add azapi

* drop enabled

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Hansjoerg Scherer <hjscherer@users.noreply.github.com>
This commit is contained in:
heoelri 2023-06-21 14:27:15 +02:00 коммит произвёл GitHub
Родитель bb028805e0
Коммит 34ad577333
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
17 изменённых файлов: 263 добавлений и 4 удалений

Двоичные данные
docs/media/architecture_diagram_with_prometheus.png Normal file

Двоичный файл не отображается.

После

Ширина:  |  Высота:  |  Размер: 187 KiB

Просмотреть файл

@ -0,0 +1,43 @@
# Managed Prometheus in Azure Mission-critical
The Azure Mission-Critical reference implementation implements a wide variety of monitoring and observability capabilities and best practices. It uses regional and global Log Analytics workspaces to store logs and metrics, decoupled from the ephemeral nature and lifecycle of it's stamps, it contains an instrumented sample application workload using Application Insights for in-depth application metrics and distributed traces, uses a health service to evaluate and report the status of each of its deployed stamps and uses a self-hosted instance of Grafana to visualize the user and system flows and provide actionable insights into the overall health status of the workload. This is a good start, but might not be detailed enough for production workloads.
To further improve the observability capabilities of a mission-critical workload build based on top of a Azure Mission-Critical reference implementation, Prometheus can be used to gain more granular metrics from the infrastructure as well as from the workload.
> **Prometheus** is an open-source monitoring solution that enables you to collect metrics from various systems, applications, and services. It allows you to query and visualize your data, alert on critical conditions, and understand the performance and behavior of your infrastructure. It is built with a focus on scalability, reliability, and simplicity, making it a popular choice for monitoring large and complex systems.
## Why Prometheus?
Azure Mission-Critical follows a cloud-native-first, PaaS-first approach. Whenever possible should PaaS and cloud-native services be prioritized when designing, developing or modernizing a workload on Azure. These cloud-native, and several PaaS services have one thing in common, they provide prometheus metrics. To call out a few examples, AKS is using Kubernetes, Kubernetes provides Prometheus metrics on various levels like the control plane and the nodes (kubelet) but also its services like cert-manager, open service mesh and nginx (and of course many more) provide prometheus metric endpoints.
As the examples above show, Prometheus, Prometheus metrics and especially the Prometheus Query Language (PromQL) are well-established standards in the cloud native space. The existing Azure Monitor add-on for Azure Kubernetes Service already supported scraping prometheus metrics from endpoints within a cluster and stored them in Log Analytics. Azure Monitor Workspace now supports also PromQL which makes it easier to use across environments and platforms.
## Managed Prometheus
To use Prometheus you could already have deployed a Prometheus server on your own to scrape the metrics endpoints and provide the data to visualization tools like Grafana. But Prometheus is a beast by itself, you've to take care of the infrastructure, operations, maintenance and so on. This was now made easier with the availability of the **Azure Monitor managed service for Prometheus**. This managed service offering is compliant with the open-source project prometheus, but it abstracts the underlying infrastructure away. It consists of the following components:
- The 'ama-agent' is deployed on Kubernetes to gather the metrics.
- An **Azure Monitor workspace** where the collected data is stored.
- **Data collection rules** to determine what data to collect.
- **Data collection endpoints** define the target to sent the data to for analysis and storage.
- **Data collection rule associations** to link rules and resources they apply to.
This once set up provides you with a query endpoint that can be leveraged by for example Grafana to feed dashboards.
## How to fit Prometheus into Azure Mission-Critical?
You might wonder how these additional resources are getting deployed and where to put them. The answer is easy, the mission-critical architecture pattern separates between global and regional components as well as between stateful and stateless components.
The Azure Monitor workspace that stores the collected data is getting deployed into the `<prefix>-monitoring-rg`. One instance per region used.
The other resources, like the Data collection rule, endpoint and rule association is deployed alongside the regional stamps. They're ephemeral and share the same lifecycle as the stamp. When a new stamp is getting deployed, new rules, endpoints and associations are deployed and the old ones are getting destroyed.
See [Data considerations in Azure Monitor workspace overview](https://learn.microsoft.com/azure/azure-monitor/essentials/azure-monitor-workspace-overview#data-considerations) for more information.
![Architecture diagram with prometheus](../media/architecture_diagram_with_prometheus.png)
## How to access the data?
The data can be visualized and queried via Grafana. The Azure Monitor workspace, deployed per-region, can be added as additional data sources to Grafana. When using the Grafana deployment in Azure Mission-Critical, the App Service instance used to host Grafana can be granted access to the Azure Monitor workspace via the built-in role "Monitoring Data Reader".
---
[Azure Mission-Critical - Full List of Documentation](/docs/README.md)

Просмотреть файл

@ -12,4 +12,12 @@ resource "azurerm_role_assignment" "loganalyticsreader_role" {
scope = data.azurerm_subscription.current.id
role_definition_name = "Log Analytics Reader"
principal_id = azurerm_linux_web_app.appservice[each.key].identity[0].principal_id
}
# Permission for Grafana to read from all Azure Monitor workspaces in the subscription
resource "azurerm_role_assignment" "loganalyticsreader_role" {
for_each = local.stamps
scope = data.azurerm_subscription.current.id
role_definition_name = "Monitoring Data Reader"
principal_id = azurerm_linux_web_app.appservice[each.key].identity[0].principal_id
}

Просмотреть файл

@ -31,13 +31,12 @@ resource "azurerm_monitor_diagnostic_setting" "acr" {
target_resource_id = azurerm_container_registry.main.id
log_analytics_workspace_id = azurerm_log_analytics_workspace.global.id
dynamic "log" {
dynamic "enabled_log" {
iterator = entry
for_each = data.azurerm_monitor_diagnostic_categories.acr.log_category_types
content {
category = entry.value
enabled = true
retention_policy {
enabled = true

Просмотреть файл

@ -4,6 +4,10 @@ terraform {
source = "hashicorp/azurerm"
version = "3.61.0"
}
azapi = {
source = "Azure/azapi"
version = "1.5.0"
}
}
backend "azurerm" {
@ -23,6 +27,8 @@ provider "azurerm" {
}
}
provider "azapi" {}
resource "azurerm_resource_group" "global" {
name = "${local.prefix}-global-rg"
location = local.location

Просмотреть файл

@ -0,0 +1,12 @@
terraform {
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "3.61.0"
}
azapi = {
source = "Azure/azapi"
version = "1.5.0"
}
}
}

Просмотреть файл

@ -1,3 +1,7 @@
output "log_analytics_workspace_id" {
value = azurerm_log_analytics_workspace.stamp.workspace_id
}
}
output "azure_monitor_workspace_id" {
value = azapi_resource.prometheus.id
}

Просмотреть файл

@ -0,0 +1,8 @@
resource "azapi_resource" "prometheus" {
type = "microsoft.monitor/accounts@2021-06-03-preview"
name = "${local.prefix}-${local.location_short}-prometheus"
parent_id = var.resource_group_id
location = var.location
response_export_values = ["*"]
}

Просмотреть файл

@ -13,6 +13,11 @@ variable "resource_group_name" {
type = string
}
variable "resource_group_id" {
description = "Resource Group Id"
type = string
}
variable "azure_monitor_action_group_resource_id" {
description = "Resource ID of a Azure Monitor action group to send alerts to"
type = string

Просмотреть файл

@ -11,6 +11,7 @@ module "stamp_monitoring" {
location = each.value
prefix = local.prefix
resource_group_id = azurerm_resource_group.monitoring.id
resource_group_name = azurerm_resource_group.monitoring.name
azure_monitor_action_group_resource_id = azurerm_monitor_action_group.main.id
alerts_enabled = var.alerts_enabled

Просмотреть файл

@ -4,6 +4,10 @@ terraform {
source = "hashicorp/azurerm"
version = "3.61.0"
}
azapi = {
source = "Azure/azapi"
version = "1.5.0"
}
}
backend "azurerm" {}
@ -19,6 +23,8 @@ provider "azurerm" {
}
}
provider "azapi" {}
# Random API key which needs to be identical between all stamps
resource "random_password" "api_key" {
length = 32

Просмотреть файл

@ -5,6 +5,18 @@ data "azurerm_cosmosdb_account" "global" {
resource_group_name = var.global_resource_group_name
}
data "azurerm_resource_group" "monitoring" {
name = var.monitoring_resource_group_name
}
data "azapi_resource" "prometheus" {
name = "${local.prefix}-${local.location_short}-prometheus"
type = "microsoft.monitor/accounts@2021-06-03-preview"
parent_id = data.azurerm_resource_group.monitoring.id
response_export_values = ["*"]
}
data "azurerm_container_registry" "global" {
name = var.acr_name
resource_group_name = var.global_resource_group_name

Просмотреть файл

@ -10,6 +10,8 @@ resource "azurerm_kubernetes_cluster" "stamp" {
automatic_channel_upgrade = "node-image"
monitor_metrics {}
oidc_issuer_enabled = true
workload_identity_enabled = true

Просмотреть файл

@ -1,3 +1,16 @@
terraform {
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "3.61.0"
}
azapi = {
source = "Azure/azapi"
version = "1.5.0"
}
}
}
# Azure Resource Group used for all resources (per stamp)
resource "azurerm_resource_group" "stamp" {
name = "${var.prefix}-stamp-${var.location}-rg"

Просмотреть файл

@ -0,0 +1,129 @@
resource "azapi_resource" "dataCollectionRule" {
schema_validation_enabled = false
type = "Microsoft.Insights/dataCollectionRules@2021-09-01-preview"
name = "${local.prefix}-${local.location_short}-dcr"
parent_id = azurerm_resource_group.stamp.id
location = azurerm_resource_group.stamp.location
body = jsonencode({
kind = "Linux"
properties = {
dataCollectionEndpointId = azapi_resource.dataCollectionEndpoint.id
dataFlows = [
{
destinations = ["MonitoringAccount1"]
streams = ["Microsoft-PrometheusMetrics"]
}
]
dataSources = {
prometheusForwarder = [
{
name = "PrometheusDataSource"
streams = ["Microsoft-PrometheusMetrics"]
labelIncludeFilter = {}
}
]
}
destinations = {
monitoringAccounts = [
{
accountResourceId = data.azapi_resource.prometheus.id
name = "MonitoringAccount1"
}
]
}
}
})
}
resource "azapi_resource" "dataCollectionEndpoint" {
type = "Microsoft.Insights/dataCollectionEndpoints@2021-09-01-preview"
name = "${local.prefix}-${local.location_short}-dce"
parent_id = azurerm_resource_group.stamp.id
location = azurerm_resource_group.stamp.location
body = jsonencode({
kind = "Linux"
properties = {}
})
}
resource "azapi_resource" "dataCollectionRuleAssociation" {
schema_validation_enabled = false
type = "Microsoft.Insights/dataCollectionRuleAssociations@2021-09-01-preview"
name = "${local.prefix}-${local.location_short}-dcra"
parent_id = azurerm_kubernetes_cluster.stamp.id
#location = azurerm_resource_group.stamp.location
body = jsonencode({
scope = azurerm_kubernetes_cluster.stamp.id
properties = {
dataCollectionRuleId = azapi_resource.dataCollectionRule.id
}
})
}
resource "azapi_resource" "prometheusK8sRuleGroup" {
type = "Microsoft.AlertsManagement/prometheusRuleGroups@2021-07-22-preview"
name = "${local.prefix}-${local.location_short}-k8sRuleGroup"
parent_id = azurerm_resource_group.stamp.id
location = azurerm_resource_group.stamp.location
body = jsonencode({
properties = {
description = "Prometheus Rule Group"
scopes = [data.azapi_resource.prometheus.id]
enabled = true
clusterName = azurerm_kubernetes_cluster.stamp.name
interval = "PT1M"
rules = [
{
record = "instance:node_cpu_utilisation:rate5m"
expression = "1 - avg without (cpu) (sum without (mode)(rate(node_cpu_seconds_total{job=\"node\", mode=~\"idle|iowait|steal\"}[5m])))"
labels = {
workload_type = "job"
}
enabled = true
},
{
record = "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate"
expression = "sum by (cluster, namespace, pod, container) ( irate(container_cpu_usage_seconds_total{job=\"cadvisor\", image!=\"\"}[5m])) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) ( 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=\"\"}))"
labels = {
workload_type = "job"
}
enabled = true
}
]
}
})
}
resource "azapi_resource" "prometheusNodeRuleGroup" {
type = "Microsoft.AlertsManagement/prometheusRuleGroups@2021-07-22-preview"
name = "${local.prefix}-${local.location_short}-nodeRuleGroup"
parent_id = azurerm_resource_group.stamp.id
location = azurerm_resource_group.stamp.location
body = jsonencode({
properties = {
description = "Prometheus Rule Group"
scopes = [data.azapi_resource.prometheus.id]
enabled = true
clusterName = azurerm_kubernetes_cluster.stamp.name
interval = "PT1M"
rules = [
{
record = "instance:node_load1_per_cpu:ratio"
expression = "( node_load1{job=\"node\"}/ instance:node_num_cpu:sum{job=\"node\"})"
labels = {
workload_type = "job"
}
enabled = true
}
]
}
})
}

Просмотреть файл

@ -2,7 +2,7 @@ terraform {
required_providers {
azurerm = {
source = "hashicorp/azurerm"
version = "3.55.0"
version = "3.61.0"
}
azapi = {
source = "azure/azapi"

Просмотреть файл

@ -163,6 +163,17 @@
"node": ">=14"
}
},
"node_modules/playwright-chromium/node_modules/playwright-core": {
"version": "1.31.1",
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.31.1.tgz",
"integrity": "sha512-JTyX4kV3/LXsvpHkLzL2I36aCdml4zeE35x+G5aPc4bkLsiRiQshU5lWeVpHFAuC8xAcbI6FDcw/8z3q2xtJSQ==",
"bin": {
"playwright": "cli.js"
},
"engines": {
"node": ">=14"
}
},
"node_modules/playwright-core": {
"version": "1.33.0",
"resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.33.0.tgz",