зеркало из https://github.com/mozilla/pontoon.git
Set up a dedicated dyno to schedule and execute Google AutoML Translate Warmup process (#2682)
Usually, separating scheduler and execution worker is recommended, but we break that pattern intentionally in order to run the task exactly when scheduled. Co-authored-by: Francesco Lodolo <flod@lodolo.net>
This commit is contained in:
Родитель
1192cb7054
Коммит
ceef69803d
2
Procfile
2
Procfile
|
@ -1,3 +1,3 @@
|
|||
web: newrelic-admin run-program gunicorn pontoon.wsgi:application -t 120 --log-file -
|
||||
worker: newrelic-admin run-program celery --app=pontoon.base.celeryapp worker --loglevel=info --without-gossip --without-mingle --without-heartbeat
|
||||
clock: newrelic-admin run-program celery --app=pontoon.base.clock beat
|
||||
automl-warmup: newrelic-admin run-program python pontoon/machinery/automl_warmup.py
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
"""
|
||||
Custom clock process is essentially a cron tool replacement for horizontally scalable
|
||||
environments such as Heroku. It acts as a job scheduler and supports more specific
|
||||
execution intervals than the Scheduler add-on.
|
||||
|
||||
More information:
|
||||
https://devcenter.heroku.com/articles/scheduled-jobs-custom-clock-processes#custom-clock-processes
|
||||
|
||||
This file defines a clock process. It's used by Procfile.
|
||||
|
||||
Add scheduled jobs in the setup_periodic_tasks() function.
|
||||
"""
|
||||
|
||||
import django
|
||||
import dotenv
|
||||
import os
|
||||
|
||||
from celery import Celery
|
||||
|
||||
|
||||
# Read dotenv file and inject its values into the environment
|
||||
dotenv.load_dotenv(dotenv_path=os.environ.get("DOTENV_PATH"))
|
||||
|
||||
# Set the default Django settings module
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pontoon.settings")
|
||||
|
||||
# Needed for standalone Django usage
|
||||
django.setup()
|
||||
|
||||
from django.conf import settings # noqa
|
||||
from pontoon.machinery.tasks import warm_up_automl_models # noqa
|
||||
|
||||
# Configure Celery using the Django settings
|
||||
app = Celery()
|
||||
app.config_from_object("django.conf:settings")
|
||||
|
||||
|
||||
@app.on_after_configure.connect
|
||||
def setup_periodic_tasks(sender, **kwargs):
|
||||
sender.add_periodic_task(
|
||||
settings.GOOGLE_AUTOML_WARMUP_INTERVAL,
|
||||
warm_up_automl_models.s(),
|
||||
)
|
|
@ -0,0 +1,47 @@
|
|||
"""
|
||||
Google Cloud AutoML Translation has latency of ~15s, caused by the loading time of a
|
||||
custom model into the chip. To keep latency low, we need to make regular dummy warmup
|
||||
requests, which is what this script does.
|
||||
"""
|
||||
|
||||
import django
|
||||
import dotenv
|
||||
import logging
|
||||
import os
|
||||
|
||||
from apscheduler.schedulers.blocking import BlockingScheduler
|
||||
|
||||
|
||||
# Read dotenv file and inject its values into the environment
|
||||
dotenv.load_dotenv(dotenv_path=os.environ.get("DOTENV_PATH"))
|
||||
|
||||
# Set the default Django settings module
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "pontoon.settings")
|
||||
|
||||
# Needed for standalone Django usage
|
||||
django.setup()
|
||||
|
||||
from django.conf import settings # noqa
|
||||
from pontoon.base.models import Locale # noqa
|
||||
from pontoon.machinery.utils import get_google_automl_translation # noqa
|
||||
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(message)s")
|
||||
log = logging.getLogger(__name__)
|
||||
sched = BlockingScheduler()
|
||||
|
||||
|
||||
@sched.scheduled_job("interval", seconds=settings.GOOGLE_AUTOML_WARMUP_INTERVAL)
|
||||
def warm_up_automl_models():
|
||||
log.info("Google AutoML Warmup process started.")
|
||||
|
||||
locales = Locale.objects.exclude(google_automl_model="").order_by("code")
|
||||
|
||||
for locale in locales:
|
||||
get_google_automl_translation("t", locale)
|
||||
log.info(f"Google AutoML Warmup for {locale.code} complete.")
|
||||
|
||||
log.info("Google AutoML Warmup process complete for all locales.")
|
||||
|
||||
|
||||
sched.start()
|
|
@ -1,22 +0,0 @@
|
|||
import logging
|
||||
|
||||
from celery import shared_task
|
||||
|
||||
from pontoon.base.models import Locale
|
||||
from pontoon.machinery.utils import get_google_automl_translation
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@shared_task(bind=True)
|
||||
def warm_up_automl_models(self):
|
||||
log.info("Google AutoML Warmup process started.")
|
||||
|
||||
locales = Locale.objects.exclude(google_automl_model="").order_by("code")
|
||||
|
||||
for locale in locales:
|
||||
get_google_automl_translation("t", locale)
|
||||
log.info(f"Google AutoML Warmup for {locale.code} complete.")
|
||||
|
||||
log.info("Google AutoML Warmup process complete for all locales.")
|
|
@ -105,10 +105,8 @@ GOOGLE_TRANSLATE_API_KEY = os.environ.get("GOOGLE_TRANSLATE_API_KEY", "")
|
|||
# Google Cloud AutoML Translation Project ID
|
||||
GOOGLE_AUTOML_PROJECT_ID = os.environ.get("GOOGLE_AUTOML_PROJECT_ID", "")
|
||||
|
||||
# Google Cloud AutoML Translation has latency of ~15s, caused by the loading time of a
|
||||
# custom model into the chip. To keep latency low, we need to make regular dummy warm-up
|
||||
# requests. It is recommended to make these requests every minute, although in our
|
||||
# experience every 5 minutes (300 seconds) is sufficient.
|
||||
# It is recommended to make Google Cloud AutoML Translation warmup requests every minute,
|
||||
# although in our experience every 5 minutes (300 seconds) is sufficient.
|
||||
GOOGLE_AUTOML_WARMUP_INTERVAL = float(
|
||||
os.environ.get("GOOGLE_AUTOML_WARMUP_INTERVAL", "300")
|
||||
)
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
# The dependencies are sorted by alphabetical order.
|
||||
# Dependencies that do not come from pypi (eg. eggs from github) are listed at the end of the list.
|
||||
# -------------------------------------------------------------------------------------------------
|
||||
APScheduler==3.9.1.post1
|
||||
bleach==3.3.0
|
||||
celery==5.2.6
|
||||
compare-locales==8.2.0
|
||||
|
|
|
@ -12,6 +12,10 @@ aniso8601==7.0.0 \
|
|||
--hash=sha256:513d2b6637b7853806ae79ffaca6f3e8754bdd547048f5ccc1420aec4b714f1e \
|
||||
--hash=sha256:d10a4bf949f619f719b227ef5386e31f49a2b6d453004b21f02661ccc8670c7b
|
||||
# via graphene
|
||||
apscheduler==3.9.1.post1 \
|
||||
--hash=sha256:b2bea0309569da53a7261bfa0ce19c67ddbfe151bda776a6a907579fdbd3eb2a \
|
||||
--hash=sha256:c8c618241dbb2785ed5a687504b14cb1851d6f7b5a4edf3a51e39cc6a069967a
|
||||
# via -r requirements/default.in
|
||||
asgiref==3.4.1 \
|
||||
--hash=sha256:4ef1ab46b484e3c706329cedeff284a5d40824200638503f5768edb6de7d58e9 \
|
||||
--hash=sha256:ffc141aa908e6f175673e7b1b3b7af4fdb0ecb738fc5c8b88f69f055c2415214
|
||||
|
@ -665,10 +669,15 @@ pytz==2022.1 \
|
|||
--hash=sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c
|
||||
# via
|
||||
# -r requirements/default.in
|
||||
# apscheduler
|
||||
# celery
|
||||
# django
|
||||
# django-dirtyfields
|
||||
# django-notifications-hq
|
||||
pytz-deprecation-shim==0.1.0.post0 \
|
||||
--hash=sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6 \
|
||||
--hash=sha256:af097bae1b616dde5c5744441e2ddc69e74dfdcb0c263129610d85b87445a59d
|
||||
# via tzlocal
|
||||
raygun4py==4.3.0 \
|
||||
--hash=sha256:9a675da0215df310a929ef8fe4f9f6c7882b3ee44a925d73e5c191fd962d7c4d
|
||||
# via -r requirements/default.in
|
||||
|
@ -763,6 +772,7 @@ six==1.16.0 \
|
|||
--hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \
|
||||
--hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
|
||||
# via
|
||||
# apscheduler
|
||||
# bleach
|
||||
# click-repl
|
||||
# compare-locales
|
||||
|
@ -797,6 +807,14 @@ tqdm==4.62.3 \
|
|||
translate-toolkit==3.3.2 \
|
||||
--hash=sha256:0795bd3c8668213199550ae4ed8938874083139ec1f8c473dcca1524a206b108
|
||||
# via -r requirements/default.in
|
||||
tzdata==2022.7 \
|
||||
--hash=sha256:2b88858b0e3120792a3c0635c23daf36a7d7eeeca657c323da299d2094402a0d \
|
||||
--hash=sha256:fe5f866eddd8b96e9fcba978f8e503c909b19ea7efda11e52e39494bad3a7bfa
|
||||
# via pytz-deprecation-shim
|
||||
tzlocal==4.2 \
|
||||
--hash=sha256:89885494684c929d9191c57aa27502afc87a579be5cdd3225c77c463ea043745 \
|
||||
--hash=sha256:ee5842fa3a795f023514ac2d801c4a81d1743bbe642e3940143326b3a00addd7
|
||||
# via apscheduler
|
||||
uhashring==2.1 \
|
||||
--hash=sha256:b21340d0d32497a67f34f5177a64908115fdc23264ed87fa7d1eca79ef9641fa
|
||||
# via python-binary-memcached
|
||||
|
|
Загрузка…
Ссылка в новой задаче