зеркало из https://github.com/mozilla/treeherder.git
Publish ingestion metrics on statsd (#7875)
* Add statsd service * Add celery task to publish stats * Add task to prod misc worker * Fix beat task * Aggregate stats * Publish on statsd * Fix date range filter * Allow to change statsd host and port * Add a prefix for stats publication
This commit is contained in:
Родитель
c13a218373
Коммит
307abe8106
|
@ -9,3 +9,6 @@ ui/partials/
|
|||
|
||||
# Ignore markdown
|
||||
*.md
|
||||
|
||||
# Statsd configuration file
|
||||
docker/statsd_config.js
|
||||
|
|
|
@ -148,7 +148,7 @@ services:
|
|||
- DATABASE_URL=mysql://root@mysql:3306/treeherder
|
||||
- PROJECTS_TO_INGEST=${PROJECTS_TO_INGEST:-autoland,try}
|
||||
entrypoint: './docker/entrypoint.sh'
|
||||
command: celery -A treeherder worker --uid=nobody --gid=nogroup --without-gossip --without-mingle --without-heartbeat -Q store_pulse_pushes,store_pulse_tasks,store_pulse_tasks_classification --concurrency=1 --loglevel=INFO
|
||||
command: celery -A treeherder worker --uid=nobody --gid=nogroup --without-gossip --without-mingle --without-heartbeat -Q store_pulse_pushes,store_pulse_tasks,store_pulse_tasks_classification,statsd --concurrency=1 --loglevel=INFO
|
||||
volumes:
|
||||
- .:/app
|
||||
depends_on:
|
||||
|
@ -157,6 +157,15 @@ services:
|
|||
- redis
|
||||
- rabbitmq
|
||||
platform: linux/amd64
|
||||
|
||||
statsd:
|
||||
container_name: statsd
|
||||
image: statsd/statsd:v0.10.2
|
||||
volumes:
|
||||
- ./docker/statsd_config.js:/usr/src/app/config.js
|
||||
ports:
|
||||
- '8125:8125'
|
||||
|
||||
volumes:
|
||||
# TODO: Experiment with using tmpfs when testing, to speed up database-using Python tests.
|
||||
mysql_data: {}
|
||||
|
|
|
@ -56,7 +56,7 @@ elif [ "$1" == "worker_log_parser_fail_json_unsheriffed" ]; then
|
|||
# Tasks that don't need a dedicated worker.
|
||||
elif [ "$1" == "worker_misc" ]; then
|
||||
export REMAP_SIGTERM=SIGQUIT
|
||||
exec newrelic-admin run-program celery -A treeherder worker --without-gossip --without-mingle --without-heartbeat -Q default,generate_perf_alerts,pushlog --concurrency=3
|
||||
exec newrelic-admin run-program celery -A treeherder worker --without-gossip --without-mingle --without-heartbeat -Q default,generate_perf_alerts,pushlog,statsd --concurrency=3
|
||||
|
||||
# Cron jobs
|
||||
elif [ "$1" == "run_intermittents_commenter" ]; then
|
||||
|
|
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
debug: true,
|
||||
healthStatus: true,
|
||||
dumpMessages: true,
|
||||
port: 8125,
|
||||
// InfluxDB backend should be added to read stats
|
||||
backends: []
|
||||
}
|
|
@ -44,3 +44,6 @@ moz-measure-noise==2.60.1
|
|||
|
||||
# Used in the intermittents commenter
|
||||
jinja2==3.1.2
|
||||
|
||||
# Client to publish runtime statistics to statsd
|
||||
statsd==4.0.1
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#
|
||||
# This file is autogenerated by pip-compile with python 3.9
|
||||
# To update, run:
|
||||
# This file is autogenerated by pip-compile with Python 3.9
|
||||
# by the following command:
|
||||
#
|
||||
# pip-compile --generate-hashes --output-file=requirements/common.txt requirements/common.in
|
||||
#
|
||||
|
@ -101,10 +101,6 @@ amqp==5.1.1 \
|
|||
--hash=sha256:2c1b13fecc0893e946c65cbd5f36427861cffa4ea2201d8f6fca22e2a373b5e2 \
|
||||
--hash=sha256:6f0956d2c23d8fa6e7691934d8c3930eadb44972cbbd1a7ae3a520f735d43359
|
||||
# via kombu
|
||||
ansicon==1.89.0 \
|
||||
--hash=sha256:e4d039def5768a47e4afec8e89e83ec3ae5a26bf00ad851f914d1240b444d2b1 \
|
||||
--hash=sha256:f1def52d17f65c2c9682cf8370c03f541f410c1752d6a14029f97318e4b9dfec
|
||||
# via jinxed
|
||||
appdirs==1.4.4 \
|
||||
--hash=sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41 \
|
||||
--hash=sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128
|
||||
|
@ -353,12 +349,6 @@ click-repl==0.3.0 \
|
|||
--hash=sha256:17849c23dba3d667247dc4defe1757fff98694e90fe37474f3feebb69ced26a9 \
|
||||
--hash=sha256:fb7e06deb8da8de86180a33a9da97ac316751c094c6899382da7feeeeb51b812
|
||||
# via celery
|
||||
colorama==0.4.6 \
|
||||
--hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \
|
||||
--hash=sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6
|
||||
# via
|
||||
# click
|
||||
# loguru
|
||||
crashtest==0.4.1 \
|
||||
--hash=sha256:80d7b1f316ebfbd429f648076d6275c877ba30ba48979de4191714a75266f0ce \
|
||||
--hash=sha256:8d23eac5fa660409f57472e3851dab7ac18aba459a8d19cbbba86d3d5aecd2a5
|
||||
|
@ -495,10 +485,6 @@ jinja2==3.1.2 \
|
|||
--hash=sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852 \
|
||||
--hash=sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61
|
||||
# via -r requirements/common.in
|
||||
jinxed==1.2.0 \
|
||||
--hash=sha256:032acda92d5c57cd216033cbbd53de731e6ed50deb63eb4781336ca55f72cda5 \
|
||||
--hash=sha256:cfc2b2e4e3b4326954d546ba6d6b9a7a796ddcb0aef8d03161d005177eb0d48b
|
||||
# via blessed
|
||||
jmespath==1.0.1 \
|
||||
--hash=sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980 \
|
||||
--hash=sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe
|
||||
|
@ -1281,6 +1267,10 @@ sqlparse==0.4.4 \
|
|||
--hash=sha256:5430a4fe2ac7d0f93e66f1efc6e1338a41884b7ddf2a350cedd20ccc4d9d28f3 \
|
||||
--hash=sha256:d446183e84b8349fa3061f0fe7f06ca94ba65b426946ffebe6e3e8295332420c
|
||||
# via django
|
||||
statsd==4.0.1 \
|
||||
--hash=sha256:99763da81bfea8daf6b3d22d11aaccb01a8d0f52ea521daab37e758a4ca7d128 \
|
||||
--hash=sha256:c2676519927f7afade3723aca9ca8ea986ef5b059556a980a867721ca69df093
|
||||
# via -r requirements/common.in
|
||||
tabulate==0.9.0 \
|
||||
--hash=sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c \
|
||||
--hash=sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f
|
||||
|
@ -1311,9 +1301,7 @@ typing-extensions==4.7.1 \
|
|||
tzdata==2023.3 \
|
||||
--hash=sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a \
|
||||
--hash=sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda
|
||||
# via
|
||||
# celery
|
||||
# django
|
||||
# via celery
|
||||
uritemplate==4.1.1 \
|
||||
--hash=sha256:4346edfc5c3b79f694bccd6d6099a322bbeb628dbf2cd86eea55a456ce5124f0 \
|
||||
--hash=sha256:830c08b8d99bdd312ea4ead05994a38e8936266f84b9a7878232db50b044e02e
|
||||
|
@ -1387,10 +1375,6 @@ whitenoise[brotli]==6.5.0 \
|
|||
--hash=sha256:15fe60546ac975b58e357ccaeb165a4ca2d0ab697e48450b8f0307ca368195a8 \
|
||||
--hash=sha256:16468e9ad2189f09f4a8c635a9031cc9bb2cdbc8e5e53365407acf99f7ade9ec
|
||||
# via -r requirements/common.in
|
||||
win32-setctime==1.1.0 \
|
||||
--hash=sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2 \
|
||||
--hash=sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad
|
||||
# via loguru
|
||||
yarl==1.9.2 \
|
||||
--hash=sha256:04ab9d4b9f587c06d801c2abfe9317b77cdf996c65a90d5e84ecc45010823571 \
|
||||
--hash=sha256:066c163aec9d3d073dc9ffe5dd3ad05069bcb03fcaab8d221290ba99f9f69ee3 \
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
import pytest
|
||||
from unittest.mock import patch, MagicMock, call
|
||||
from treeherder.workers.stats import publish_stats
|
||||
from treeherder.model.models import Push, Job
|
||||
from django.utils import timezone
|
||||
from datetime import timedelta
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@patch('treeherder.workers.stats.get_stats_client')
|
||||
def test_publish_stats_nothing_to_do(get_worker_mock, django_assert_num_queries, caplog):
|
||||
statsd_client = MagicMock()
|
||||
get_worker_mock.return_value = statsd_client
|
||||
assert Push.objects.count() == 0
|
||||
assert Job.objects.count() == 0
|
||||
with django_assert_num_queries(2):
|
||||
publish_stats()
|
||||
assert [(level, message) for _, level, message in caplog.record_tuples] == [
|
||||
(20, 'Publishing runtime statistics to statsd'),
|
||||
(20, 'Ingested 0 pushes'),
|
||||
(20, 'Ingested 0 jobs in total'),
|
||||
]
|
||||
assert statsd_client.call_args_list == []
|
||||
|
||||
|
||||
@pytest.mark.django_db
|
||||
@patch('treeherder.workers.stats.get_stats_client')
|
||||
def test_publish_stats(
|
||||
get_worker_mock, eleven_jobs_stored_new_date, django_assert_num_queries, caplog, settings
|
||||
):
|
||||
"Test statsd statistics publication task"
|
||||
settings.CELERY_STATS_PUBLICATION_DELAY = 10
|
||||
statsd_client = MagicMock()
|
||||
get_worker_mock.return_value = statsd_client
|
||||
assert Push.objects.count() == 10
|
||||
assert Job.objects.count() == 11
|
||||
Push.objects.update(time=timezone.now() - timedelta(minutes=10))
|
||||
Job.objects.update(end_time=timezone.now() - timedelta(minutes=10))
|
||||
|
||||
with django_assert_num_queries(2):
|
||||
publish_stats()
|
||||
assert [(level, message) for _, level, message in caplog.record_tuples] == [
|
||||
(20, 'Publishing runtime statistics to statsd'),
|
||||
(20, 'Ingested 10 pushes'),
|
||||
(20, 'Ingested 11 jobs in total'),
|
||||
]
|
||||
assert statsd_client.incr.call_args_list == [
|
||||
call('push', 10),
|
||||
call('jobs', 11),
|
||||
call('jobs_repo.mozilla-central', 11),
|
||||
call('jobs_state.completed', 11),
|
||||
]
|
|
@ -15,3 +15,4 @@ app.config_from_object('django.conf:settings', namespace='CELERY')
|
|||
|
||||
# Load task modules from all registered Django app configs.
|
||||
app.autodiscover_tasks()
|
||||
app.autodiscover_tasks(['treeherder.workers.stats'])
|
||||
|
|
|
@ -10,6 +10,7 @@ from kombu import Exchange, Queue
|
|||
|
||||
from treeherder.config.utils import connection_should_use_tls
|
||||
from treeherder.middleware import add_headers_function
|
||||
from celery.schedules import crontab
|
||||
|
||||
# TODO: Switch to pathlib once using Python 3.
|
||||
SRC_DIR = dirname(dirname(dirname(abspath(__file__))))
|
||||
|
@ -342,6 +343,7 @@ CELERY_TASK_QUEUES = [
|
|||
routing_key='store_pulse_tasks_classification',
|
||||
),
|
||||
Queue('store_pulse_pushes', Exchange('default'), routing_key='store_pulse_pushes'),
|
||||
Queue('statsd', Exchange('default'), routing_key='statsd'),
|
||||
]
|
||||
|
||||
# Force all queues to be explicitly listed in `CELERY_TASK_QUEUES` to help prevent typos
|
||||
|
@ -378,6 +380,12 @@ CELERY_TASK_ACKS_LATE = True
|
|||
CELERY_TASK_SOFT_TIME_LIMIT = 15 * 60
|
||||
CELERY_TASK_TIME_LIMIT = CELERY_TASK_SOFT_TIME_LIMIT + 30
|
||||
|
||||
# Periodically publish runtime statistics on statsd (in minutes)
|
||||
CELERY_STATS_PUBLICATION_DELAY = 5
|
||||
assert (
|
||||
0 < CELERY_STATS_PUBLICATION_DELAY < 60 and 60 % 10 == 0
|
||||
), "Celery task must be a valid cron delay in minutes"
|
||||
|
||||
CELERY_BEAT_SCHEDULE = {
|
||||
# this is just a failsafe in case the Pulse ingestion misses something
|
||||
'fetch-push-logs-every-5-minutes': {
|
||||
|
@ -386,6 +394,12 @@ CELERY_BEAT_SCHEDULE = {
|
|||
'relative': True,
|
||||
'options': {"queue": "pushlog"},
|
||||
},
|
||||
'publish_stats': {
|
||||
'task': 'publish-stats',
|
||||
'schedule': crontab(minute=f'*/{CELERY_STATS_PUBLICATION_DELAY}'),
|
||||
'relative': True,
|
||||
'options': {'queue': 'statsd'},
|
||||
},
|
||||
}
|
||||
|
||||
# CORS Headers
|
||||
|
@ -490,5 +504,10 @@ NOTIFY_ACCESS_TOKEN = env('NOTIFY_ACCESS_TOKEN', default=None)
|
|||
# https://github.com/settings/tokens
|
||||
GITHUB_TOKEN = env("GITHUB_TOKEN", default=None)
|
||||
|
||||
# Statsd server configuration
|
||||
STATSD_HOST = env('STATSD_HOST', default='statsd')
|
||||
STATSD_PORT = env('STATSD_PORT', default=8124)
|
||||
STATSD_PREFIX = env('STATSD_PREFIX', default='treeherder')
|
||||
|
||||
# For dockerflow
|
||||
BASE_DIR = SRC_DIR
|
||||
|
|
|
@ -0,0 +1,76 @@
|
|||
from celery import shared_task
|
||||
from django.conf import settings
|
||||
from django.utils import timezone
|
||||
from datetime import timedelta
|
||||
from django.db.models import Count
|
||||
from treeherder.model.models import Push, Job
|
||||
from itertools import groupby
|
||||
import statsd
|
||||
import logging
|
||||
|
||||
logging.basicConfig()
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
|
||||
def get_stats_client():
|
||||
return statsd.StatsClient(
|
||||
settings.STATSD_HOST, settings.STATSD_PORT, prefix=settings.STATSD_PREFIX
|
||||
)
|
||||
|
||||
|
||||
@shared_task(name='publish-stats')
|
||||
def publish_stats():
|
||||
"""
|
||||
Publish runtime stats on statsd
|
||||
"""
|
||||
stats_client = get_stats_client()
|
||||
logger.info('Publishing runtime statistics to statsd')
|
||||
end_date = timezone.now()
|
||||
# Round the date to the current date range
|
||||
# This should not overlapse as the beat is set as a relative cron based delay in minutes
|
||||
end_date = end_date - timedelta(
|
||||
minutes=end_date.minute % settings.CELERY_STATS_PUBLICATION_DELAY,
|
||||
seconds=end_date.second,
|
||||
microseconds=end_date.microsecond,
|
||||
)
|
||||
|
||||
start_date = end_date - timedelta(minutes=settings.CELERY_STATS_PUBLICATION_DELAY)
|
||||
logger.debug(f'Reading data ingested from {start_date} to {end_date}')
|
||||
|
||||
# Nb of pushes
|
||||
pushes_count = Push.objects.filter(time__lte=end_date, time__gt=start_date).count()
|
||||
logger.info(f'Ingested {pushes_count} pushes')
|
||||
if pushes_count:
|
||||
stats_client.incr('push', pushes_count)
|
||||
|
||||
# Compute stats for jobs in a single request
|
||||
jobs_stats = (
|
||||
Job.objects.filter(end_time__lte=end_date, end_time__gt=start_date)
|
||||
.values('push__repository__name', 'state')
|
||||
.annotate(count=Count('id'))
|
||||
.values_list('push__repository__name', 'state', 'count')
|
||||
)
|
||||
# nb of job total
|
||||
jobs_total = sum(ct for _, _, ct in jobs_stats)
|
||||
logger.info(f'Ingested {jobs_total} jobs in total')
|
||||
if jobs_total:
|
||||
stats_client.incr('jobs', jobs_total)
|
||||
|
||||
# nb of job per repo
|
||||
jobs_per_repo = {
|
||||
key: sum(ct for k, ct in vals)
|
||||
for key, vals in groupby(sorted((repo, ct) for repo, _, ct in jobs_stats), lambda x: x[0])
|
||||
}
|
||||
logger.debug(f'Jobs per repo: {jobs_per_repo}')
|
||||
for key, value in jobs_per_repo.items():
|
||||
stats_client.incr(f'jobs_repo.{key}', value)
|
||||
|
||||
# nb of job per state
|
||||
jobs_per_state = {
|
||||
key: sum(ct for k, ct in vals)
|
||||
for key, vals in groupby(sorted((state, ct) for _, state, ct in jobs_stats), lambda x: x[0])
|
||||
}
|
||||
logger.debug(f'Jobs per state : {jobs_per_state}')
|
||||
for key, value in jobs_per_state.items():
|
||||
stats_client.incr(f'jobs_state.{key}', value)
|
Загрузка…
Ссылка в новой задаче