Set up a dedicated dyno to schedule and execute Google AutoML Translate Warmup process (#2682)

Usually, separating scheduler and execution worker is recommended, but we break that pattern intentionally in order to run the task exactly when scheduled.

Co-authored-by: Francesco Lodolo <flod@lodolo.net>
This commit is contained in:
Matjaž Horvat 2023-02-16 10:06:39 +01:00 коммит произвёл GitHub
Родитель 1192cb7054
Коммит ceef69803d
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 69 добавлений и 70 удалений

Просмотреть файл

@ -1,3 +1,3 @@
web: newrelic-admin run-program gunicorn pontoon.wsgi:application -t 120 --log-file -
worker: newrelic-admin run-program celery --app=pontoon.base.celeryapp worker --loglevel=info --without-gossip --without-mingle --without-heartbeat
clock: newrelic-admin run-program celery --app=pontoon.base.clock beat
automl-warmup: newrelic-admin run-program python pontoon/machinery/automl_warmup.py

Просмотреть файл

@ -1,43 +0,0 @@
"""
Custom clock process is essentially a cron tool replacement for horizontally scalable
environments such as Heroku. It acts as a job scheduler and supports more specific
execution intervals than the Scheduler add-on.
More information:
https://devcenter.heroku.com/articles/scheduled-jobs-custom-clock-processes#custom-clock-processes
This file defines a clock process. It's used by Procfile.
Add scheduled jobs in the setup_periodic_tasks() function.
"""
import django
import dotenv
import os
from celery import Celery
# Read dotenv file and inject its values into the environment
dotenv.load_dotenv(dotenv_path=os.environ.get("DOTENV_PATH"))
# Set the default Django settings module
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pontoon.settings")
# Needed for standalone Django usage
django.setup()
from django.conf import settings # noqa
from pontoon.machinery.tasks import warm_up_automl_models # noqa
# Configure Celery using the Django settings
app = Celery()
app.config_from_object("django.conf:settings")
@app.on_after_configure.connect
def setup_periodic_tasks(sender, **kwargs):
sender.add_periodic_task(
settings.GOOGLE_AUTOML_WARMUP_INTERVAL,
warm_up_automl_models.s(),
)

Просмотреть файл

@ -0,0 +1,47 @@
"""
Google Cloud AutoML Translation has latency of ~15s, caused by the loading time of a
custom model into the chip. To keep latency low, we need to make regular dummy warmup
requests, which is what this script does.
"""
import django
import dotenv
import logging
import os
from apscheduler.schedulers.blocking import BlockingScheduler
# Read dotenv file and inject its values into the environment
dotenv.load_dotenv(dotenv_path=os.environ.get("DOTENV_PATH"))
# Set the default Django settings module
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pontoon.settings")
# Needed for standalone Django usage
django.setup()
from django.conf import settings # noqa
from pontoon.base.models import Locale # noqa
from pontoon.machinery.utils import get_google_automl_translation # noqa
logging.basicConfig(level=logging.INFO, format="%(message)s")
log = logging.getLogger(__name__)
sched = BlockingScheduler()
@sched.scheduled_job("interval", seconds=settings.GOOGLE_AUTOML_WARMUP_INTERVAL)
def warm_up_automl_models():
log.info("Google AutoML Warmup process started.")
locales = Locale.objects.exclude(google_automl_model="").order_by("code")
for locale in locales:
get_google_automl_translation("t", locale)
log.info(f"Google AutoML Warmup for {locale.code} complete.")
log.info("Google AutoML Warmup process complete for all locales.")
sched.start()

Просмотреть файл

@ -1,22 +0,0 @@
import logging
from celery import shared_task
from pontoon.base.models import Locale
from pontoon.machinery.utils import get_google_automl_translation
log = logging.getLogger(__name__)
@shared_task(bind=True)
def warm_up_automl_models(self):
log.info("Google AutoML Warmup process started.")
locales = Locale.objects.exclude(google_automl_model="").order_by("code")
for locale in locales:
get_google_automl_translation("t", locale)
log.info(f"Google AutoML Warmup for {locale.code} complete.")
log.info("Google AutoML Warmup process complete for all locales.")

Просмотреть файл

@ -105,10 +105,8 @@ GOOGLE_TRANSLATE_API_KEY = os.environ.get("GOOGLE_TRANSLATE_API_KEY", "")
# Google Cloud AutoML Translation Project ID
GOOGLE_AUTOML_PROJECT_ID = os.environ.get("GOOGLE_AUTOML_PROJECT_ID", "")
# Google Cloud AutoML Translation has latency of ~15s, caused by the loading time of a
# custom model into the chip. To keep latency low, we need to make regular dummy warm-up
# requests. It is recommended to make these requests every minute, although in our
# experience every 5 minutes (300 seconds) is sufficient.
# It is recommended to make Google Cloud AutoML Translation warmup requests every minute,
# although in our experience every 5 minutes (300 seconds) is sufficient.
GOOGLE_AUTOML_WARMUP_INTERVAL = float(
os.environ.get("GOOGLE_AUTOML_WARMUP_INTERVAL", "300")
)

Просмотреть файл

@ -14,6 +14,7 @@
# The dependencies are sorted by alphabetical order.
# Dependencies that do not come from pypi (eg. eggs from github) are listed at the end of the list.
# -------------------------------------------------------------------------------------------------
APScheduler==3.9.1.post1
bleach==3.3.0
celery==5.2.6
compare-locales==8.2.0

Просмотреть файл

@ -12,6 +12,10 @@ aniso8601==7.0.0 \
--hash=sha256:513d2b6637b7853806ae79ffaca6f3e8754bdd547048f5ccc1420aec4b714f1e \
--hash=sha256:d10a4bf949f619f719b227ef5386e31f49a2b6d453004b21f02661ccc8670c7b
# via graphene
apscheduler==3.9.1.post1 \
--hash=sha256:b2bea0309569da53a7261bfa0ce19c67ddbfe151bda776a6a907579fdbd3eb2a \
--hash=sha256:c8c618241dbb2785ed5a687504b14cb1851d6f7b5a4edf3a51e39cc6a069967a
# via -r requirements/default.in
asgiref==3.4.1 \
--hash=sha256:4ef1ab46b484e3c706329cedeff284a5d40824200638503f5768edb6de7d58e9 \
--hash=sha256:ffc141aa908e6f175673e7b1b3b7af4fdb0ecb738fc5c8b88f69f055c2415214
@ -665,10 +669,15 @@ pytz==2022.1 \
--hash=sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c
# via
# -r requirements/default.in
# apscheduler
# celery
# django
# django-dirtyfields
# django-notifications-hq
pytz-deprecation-shim==0.1.0.post0 \
--hash=sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6 \
--hash=sha256:af097bae1b616dde5c5744441e2ddc69e74dfdcb0c263129610d85b87445a59d
# via tzlocal
raygun4py==4.3.0 \
--hash=sha256:9a675da0215df310a929ef8fe4f9f6c7882b3ee44a925d73e5c191fd962d7c4d
# via -r requirements/default.in
@ -763,6 +772,7 @@ six==1.16.0 \
--hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \
--hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
# via
# apscheduler
# bleach
# click-repl
# compare-locales
@ -797,6 +807,14 @@ tqdm==4.62.3 \
translate-toolkit==3.3.2 \
--hash=sha256:0795bd3c8668213199550ae4ed8938874083139ec1f8c473dcca1524a206b108
# via -r requirements/default.in
tzdata==2022.7 \
--hash=sha256:2b88858b0e3120792a3c0635c23daf36a7d7eeeca657c323da299d2094402a0d \
--hash=sha256:fe5f866eddd8b96e9fcba978f8e503c909b19ea7efda11e52e39494bad3a7bfa
# via pytz-deprecation-shim
tzlocal==4.2 \
--hash=sha256:89885494684c929d9191c57aa27502afc87a579be5cdd3225c77c463ea043745 \
--hash=sha256:ee5842fa3a795f023514ac2d801c4a81d1743bbe642e3940143326b3a00addd7
# via apscheduler
uhashring==2.1 \
--hash=sha256:b21340d0d32497a67f34f5177a64908115fdc23264ed87fa7d1eca79ef9641fa
# via python-binary-memcached