From d7849ac7af32c9790fc40a8dfc2fdea05a911437 Mon Sep 17 00:00:00 2001 From: Sarah Clements Date: Wed, 18 Aug 2021 18:22:13 +0100 Subject: [PATCH] Revert "heroku cleanup (#7219)" (#7245) This reverts commit 7cb8d8abe71aa9d417b120bfdee3833d7585a860. --- .circleci/config.yml | 3 + .gitattributes | 3 + .slugignore | 9 ++ Procfile | 53 ++++++++ app.json | 106 ++++++++++++++++ bin/post_compile | 31 +++++ bin/pre_deploy | 5 + docker-compose.yml | 5 +- docker/Dockerfile | 7 -- docker/dev.Dockerfile | 2 + docker/entrypoint_prod.sh | 15 ++- lints/queuelint.py | 49 ++++++++ misc/compare_tasks.py | 2 +- package.json | 1 + requirements.txt | 5 + runtime.txt | 1 + .../cycle_data/test_perfherder_cycling.py | 31 ++++- tests/test_setup.py | 13 ++ tox.ini | 24 ++++ treeherder/config/settings.py | 28 +++-- treeherder/config/utils.py | 32 ++++- .../management/commands/heroku_environment.py | 116 ++++++++++++++++++ treeherder/middleware.py | 2 +- treeherder/model/data_cycling/utils.py | 9 +- .../model/management/commands/cycle_data.py | 6 +- ui/taskcluster-auth-callback/constants.js | 6 +- 26 files changed, 533 insertions(+), 31 deletions(-) create mode 100644 .slugignore create mode 100644 Procfile create mode 100644 app.json create mode 100755 bin/post_compile create mode 100755 lints/queuelint.py create mode 100644 requirements.txt create mode 100644 runtime.txt create mode 100755 treeherder/etl/management/commands/heroku_environment.py diff --git a/.circleci/config.yml b/.circleci/config.yml index 5da05f023..309a0d9aa 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -32,6 +32,9 @@ jobs: keys: - node-deps-v1-{{ .Branch }} - run: pip install tox + - run: + command: tox -e heroku + name: Run heroku builds and release checks - run: command: tox -e docs name: Build docs diff --git a/.gitattributes b/.gitattributes index db8bf4294..4ca15cbe3 100644 --- a/.gitattributes +++ b/.gitattributes @@ -115,6 +115,9 @@ browserslist text eol=lf Makefile text eol=lf makefile text eol=lf +## HEROKU +Procfile text eol=lf +.slugignore text eol=lf ## GRAPHICS *.ai binary diff --git a/.slugignore b/.slugignore new file mode 100644 index 000000000..3603ef1bb --- /dev/null +++ b/.slugignore @@ -0,0 +1,9 @@ +# Files/directories to be excluded from the Heroku build, to reduce +# the resultant slug size and thus speed up dyno deploys. See: +# https://devcenter.heroku.com/articles/slug-compiler#ignoring-files-with-slugignore +# Anything not committed to version control (for example `node_modules`) +# is automatically excluded so does not need to be listed here. + +/docker/ +/docs/ +/tests/ diff --git a/Procfile b/Procfile new file mode 100644 index 000000000..53be5b3a0 --- /dev/null +++ b/Procfile @@ -0,0 +1,53 @@ +# This file defines the processes that will be run on Heroku. +# Each line must be in the format `: `. +# https://devcenter.heroku.com/articles/how-heroku-works#knowing-what-to-execute +# https://devcenter.heroku.com/articles/procfile + +# The `release` process type specifies the command to run during deployment, and is where +# we run DB migrations and other tasks that are 'release' rather than 'build' specific: +# https://devcenter.heroku.com/articles/release-phase +# https://devcenter.heroku.com/articles/runtime-principles#build-release-run +release: ./bin/pre_deploy + +# The `web` process type is the only one that receives external traffic from Heroku's routers. +# We set the maximum request duration to 20 seconds, to ensure that poorly performing API +# queries do not consume a gunicorn worker for unbounded lengths of time. See: +# https://devcenter.heroku.com/articles/python-gunicorn +# The Heroku Python buildpack sets some sensible gunicorn defaults via environment variables: +# https://github.com/heroku/heroku-buildpack-python/blob/master/vendor/python.gunicorn.sh +# https://github.com/heroku/heroku-buildpack-python/blob/master/vendor/WEB_CONCURRENCY.sh +# TODO: Experiment with different dyno sizes and gunicorn concurrency/worker types (bug 1175472). +web: newrelic-admin run-program gunicorn treeherder.config.wsgi:application --timeout 20 + +# All other process types can have arbitrary names. +# The Celery options such as `--without-heartbeat` are from the recommendations here: +# https://www.cloudamqp.com/docs/celery.html +# The REMAP_SIGTERM is as recommended by: +# https://devcenter.heroku.com/articles/celery-heroku#using-remap_sigterm + +# This schedules (but does not run itself) the cron-like tasks listed in `CELERY_BEAT_SCHEDULE`. +# However we're moving away from using this in favour of the Heroku scheduler addon. +# NB: This should not be scaled up to more than 1 dyno otherwise duplicate tasks will be scheduled. +# TODO: Move the remaining tasks to the addon and remove this process type (deps of bug 1176492). +celery_scheduler: REMAP_SIGTERM=SIGQUIT newrelic-admin run-program celery beat -A treeherder + +# Push/job data is consumed from exchanges on pulse.mozilla.org using these kombu-powered +# Django management commands. They do not ingest the data themselves, instead adding tasks +# to the `store_pulse_{pushes,jobs}` queues for `worker_store_pulse_data` to process. +# NB: These should not be scaled up to more than 1 of each. +# TODO: Merge these two listeners into one since they use so little CPU each (bug 1530965). +pulse_listener_pushes: newrelic-admin run-program ./manage.py pulse_listener_pushes +pulse_listener_tasks: newrelic-admin run-program ./manage.py pulse_listener_tasks + +# Processes pushes/jobs from Pulse that were collected by `pulse_listener_{pushes,tasks}`. +worker_store_pulse_data: REMAP_SIGTERM=SIGQUIT newrelic-admin run-program celery worker -A treeherder --without-gossip --without-mingle --without-heartbeat -Q store_pulse_pushes,store_pulse_tasks --concurrency=3 + +# Handles the log parsing tasks scheduled by `worker_store_pulse_data` as part of job ingestion. +worker_log_parser: REMAP_SIGTERM=SIGQUIT newrelic-admin run-program celery worker -A treeherder --without-gossip --without-mingle --without-heartbeat -Q log_parser --concurrency=7 +worker_log_parser_fail_raw_sheriffed: REMAP_SIGTERM=SIGQUIT newrelic-admin run-program celery worker -A treeherder --without-gossip --without-mingle --without-heartbeat -Q log_parser_fail_raw_sheriffed --concurrency=1 +worker_log_parser_fail_raw_unsheriffed: REMAP_SIGTERM=SIGQUIT newrelic-admin run-program celery worker -A treeherder --without-gossip --without-mingle --without-heartbeat -Q log_parser_fail_raw_unsheriffed --concurrency=1 +worker_log_parser_fail_json_sheriffed: REMAP_SIGTERM=SIGQUIT newrelic-admin run-program celery worker -A treeherder --without-gossip --without-mingle --without-heartbeat -Q log_parser_fail_json_sheriffed --concurrency=7 +worker_log_parser_fail_json_unsheriffed: REMAP_SIGTERM=SIGQUIT newrelic-admin run-program celery worker -A treeherder --without-gossip --without-mingle --without-heartbeat -Q log_parser_fail_json_unsheriffed --concurrency=7 + +# Tasks that don't need a dedicated worker. +worker_misc: REMAP_SIGTERM=SIGQUIT newrelic-admin run-program celery worker -A treeherder --without-gossip --without-mingle --without-heartbeat -Q default,generate_perf_alerts,pushlog,seta_analyze_failures --concurrency=3 diff --git a/app.json b/app.json new file mode 100644 index 000000000..ccc7ca4f5 --- /dev/null +++ b/app.json @@ -0,0 +1,106 @@ +{ + "addons": [ + "scheduler", + "cloudamqp:tiger", + "heroku-redis:premium-0", + "papertrail:choklad", + { + "plan": "jawsdb:blacktip", + "options": { + "version": "5.7" + } + } + ], + "buildpacks": [ + { + "url": "https://buildpack-registry.s3.amazonaws.com/buildpacks/edmorley/timestamps.tgz" + }, + { + "url": "heroku/nodejs" + }, + { + "url": "heroku/python" + } + ], + "env": { + "BROKER_URL": { + "value": "$CLOUDAMQP_URL" + }, + "DATABASE_URL": { + "value": "$JAWSDB_URL" + }, + "HEROKU_APP_NAME": { + "required": true + }, + "HEROKU_REVIEW_APP": true, + "LOGGING_LEVEL": "INFO", + "PULSE_QUEUE_NAME": { + "generator": "secret" + }, + "PULSE_RESULSETS_QUEUE_NAME": { + "generator": "secret" + }, + "PULSE_TASKS_QUEUE_NAME": { + "generator": "secret" + }, + "PULSE_AUTO_DELETE_QUEUES": true, + "PROJECTS_TO_INGEST": { + "value": "autoland,try" + }, + "TREEHERDER_DJANGO_SECRET_KEY": { + "generator": "secret" + }, + "YARN_PRODUCTION": { + "value": "true" + } + }, + "formation": { + "celery_scheduler": { + "quantity": 0, + "size": "Standard-1X" + }, + "pulse_listener_pushes": { + "quantity": 1, + "size": "Standard-1X" + }, + "pulse_listener_tasks": { + "quantity": 1, + "size": "Standard-1X" + }, + "web": { + "quantity": 1, + "size": "Standard-1X" + }, + "worker_log_parser": { + "quantity": 1, + "size": "Standard-1X" + }, + "worker_log_parser_fail_raw_sheriffed": { + "quantity": 1, + "size": "Standard-1X" + }, + "worker_log_parser_fail_raw_unsheriffed": { + "quantity": 1, + "size": "Standard-1X" + }, + "worker_log_parser_fail_json_sheriffed": { + "quantity": 1, + "size": "Standard-1X" + }, + "worker_log_parser_fail_json_unsheriffed": { + "quantity": 1, + "size": "Standard-1X" + }, + "worker_misc": { + "quantity": 1, + "size": "Standard-1X" + }, + "worker_store_pulse_data": { + "quantity": 4, + "size": "Standard-1X" + } + }, + "name": "treeherder", + "scripts": {}, + "stack": "heroku-18" +} diff --git a/bin/post_compile b/bin/post_compile new file mode 100755 index 000000000..6f02e2ac5 --- /dev/null +++ b/bin/post_compile @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +# This script is for running tasks that are 'build' rather than 'release' specific: +# https://devcenter.heroku.com/articles/runtime-principles#build-release-run +# It is run automatically at the end of the Heroku Python buildpack's compile steps, +# which is after pip install and Django collectstatic have been run. +# NB: No changes to external services should be made here (use `pre_deploy` instead). + +# Make non-zero exit codes & other errors fatal. +set -euo pipefail + +# Make the current Git revision accessible at /revision.txt +export REVISION=${SOURCE_VERSION:-$(git rev-parse HEAD)} +echo "$REVISION" > .build/revision.txt +echo "This is the revision of the build: $REVISION" + +# Generate gzipped versions of files that would benefit from compression, that +# WhiteNoise can then serve in preference to the originals. This is required +# since WhiteNoise's Django storage backend only gzips assets handled by +# collectstatic, and so does not affect files in the `.build/` directory +# since they are instead generated by Neutrino/webpack. +python -m whitenoise.compress .build + +# Remove nodejs files created by the Heroku Nodejs buildpack, to reduce slug size +# (and avoid environment variable pollution from the nodejs profile script), since +# they are no longer required after `yarn heroku-postbuild` has run. The buildpack +# cache will still contain them, so this doesn't slow down the next slug compile. +# Only delete if running as part of Heroku because the Travis run takes too long to delete it +if [[ -d .heroku/ ]]; then + rm -r .heroku/node/ .heroku/yarn/ .profile.d/nodejs.sh node_modules/ +fi diff --git a/bin/pre_deploy b/bin/pre_deploy index 73c5ded22..ec52a83a7 100755 --- a/bin/pre_deploy +++ b/bin/pre_deploy @@ -1,6 +1,11 @@ #!/usr/bin/env bash # This script is for running tasks that are 'release' rather than 'build' specific: +# https://devcenter.heroku.com/articles/runtime-principles#build-release-run +# It is referenced via the `release` entry in `Procfile`, and is run after the +# buildpack compile but prior to new code being deployed: +# https://devcenter.heroku.com/articles/release-phase +# NB: Changes made to the filesystem will not be preserved (use `post_compile` instead). # Make non-zero exit codes & other errors fatal. set -euo pipefail diff --git a/docker-compose.yml b/docker-compose.yml index d0bae8739..99ad671de 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -27,10 +27,7 @@ services: - TREEHERDER_DEBUG=True - NEW_RELIC_INSIGHTS_API_KEY=${NEW_RELIC_INSIGHTS_API_KEY:-} - PROJECTS_TO_INGEST=${PROJECTS_TO_INGEST:-autoland,try} - # The default cert is for access to the stage replica; for accessing - # prototype this variable will need to reference deployment/gcp/ca-cert-prototype.pem. - # See treeherder docs for more details. - - TLS_CERT_PATH=${TLS_CERT_PATH:-deployment/gcp/ca-cert.pem} + - TLS_CERT_PATH=${TLS_CERT_PATH} entrypoint: './docker/entrypoint.sh' # We *ONLY* initialize the data when we're running the backend command: './initialize_data.sh ./manage.py runserver 0.0.0.0:8000' diff --git a/docker/Dockerfile b/docker/Dockerfile index cfacc4d2d..f60b43a3a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -39,12 +39,5 @@ COPY --from=frontend /app/.build/ /app/.build/ RUN python manage.py collectstatic --noinput -# Generate gzipped versions of files that would benefit from compression, that -# WhiteNoise can then serve in preference to the originals. This is required -# since WhiteNoise's Django storage backend only gzips assets handled by -# collectstatic, and so does not affect files in the `.build/` directory -# since they are instead generated by Neutrino/webpack. -RUN python -m whitenoise.compress .build - ENTRYPOINT ["/bin/bash", "/app/docker/entrypoint_prod.sh"] CMD ["web"] diff --git a/docker/dev.Dockerfile b/docker/dev.Dockerfile index 3c6db9145..827a8d182 100644 --- a/docker/dev.Dockerfile +++ b/docker/dev.Dockerfile @@ -1,3 +1,4 @@ +# Use the same Python version as in Heroku (see runtime.txt) FROM python:3.7.11 # Variables that are not specific to a particular environment. @@ -26,5 +27,6 @@ ADD . /app WORKDIR /app # Common and dev deps installed separately to prove that common.txt works standalone +# (given that dev.txt is not installed on Heroku) RUN pip install --no-cache-dir --disable-pip-version-check --require-hashes -r requirements/common.txt RUN pip install --no-cache-dir --disable-pip-version-check --require-hashes -r requirements/dev.txt diff --git a/docker/entrypoint_prod.sh b/docker/entrypoint_prod.sh index 181978be8..39623a4d2 100755 --- a/docker/entrypoint_prod.sh +++ b/docker/entrypoint_prod.sh @@ -2,19 +2,32 @@ # The `release` process type specifies the command to run during deployment, and is where # we run DB migrations and other tasks that are 'release' rather than 'build' specific: +# https://devcenter.heroku.com/articles/release-phase +# https://devcenter.heroku.com/articles/runtime-principles#build-release-run if [ "$1" == "release" ]; then exec ./bin/pre_deploy +# The `web` process type is the only one that receives external traffic from Heroku's routers. # We set the maximum request duration to 20 seconds, to ensure that poorly performing API -# queries do not consume a gunicorn worker for unbounded lengths of time. +# queries do not consume a gunicorn worker for unbounded lengths of time. See: +# https://devcenter.heroku.com/articles/python-gunicorn +# The Heroku Python buildpack sets some sensible gunicorn defaults via environment variables: +# https://github.com/heroku/heroku-buildpack-python/blob/master/vendor/python.gunicorn.sh +# https://github.com/heroku/heroku-buildpack-python/blob/master/vendor/WEB_CONCURRENCY.sh +# TODO: Experiment with different dyno sizes and gunicorn concurrency/worker types (bug 1175472). elif [ "$1" == "web" ]; then exec newrelic-admin run-program gunicorn treeherder.config.wsgi:application --timeout 20 --bind 0.0.0.0 # All other process types can have arbitrary names. # The Celery options such as `--without-heartbeat` are from the recommendations here: # https://www.cloudamqp.com/docs/celery.html +# The REMAP_SIGTERM is as recommended by: +# https://devcenter.heroku.com/articles/celery-heroku#using-remap_sigterm # This schedules (but does not run itself) the cron-like tasks listed in `CELERY_BEAT_SCHEDULE`. +# However we're moving away from using this in favour of the Heroku scheduler addon. +# NB: This should not be scaled up to more than 1 dyno otherwise duplicate tasks will be scheduled. +# TODO: Move the remaining tasks to the addon and remove this process type (deps of bug 1176492). elif [ "$1" == "celery_scheduler" ]; then export REMAP_SIGTERM=SIGQUIT exec newrelic-admin run-program celery beat -A treeherder diff --git a/lints/queuelint.py b/lints/queuelint.py new file mode 100755 index 000000000..a134be3fa --- /dev/null +++ b/lints/queuelint.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python + +import ast +import re +import sys + +procfile_re = re.compile(r"worker_\w*: .* -Q ([^ ]*)") + +procfile_queues = [] +with open("Procfile") as f: + for line in f: + m = procfile_re.match(line) + if m: + procfile_queues.extend(m.group(1).split(",")) + + +code = ast.parse(open("treeherder/config/settings.py").read()) + +settings_queues = set() + +queues_list = None +for item in code.body: + if isinstance(item, ast.Assign) and item.targets[0].id == "CELERY_TASK_QUEUES": + queues_list = item.value + +if queues_list is None: + print("Failed to find list of queues in settings file") + sys.exit(1) + +for call in queues_list.elts: + settings_queues.add(call.args[0].s) + +procfile_queues = set(procfile_queues) + +if settings_queues != procfile_queues: + print("ERROR - mismatches found") + missing_procfile = procfile_queues - settings_queues + if missing_procfile: + print( + "The following queues were in the Procfile, but not in the settings file:\n%s\n" + % "\n".join(missing_procfile) + ) + missing_settings = settings_queues - procfile_queues + if missing_settings: + print( + "The following queues were in the settings, but not in the Procfile:\n%s\n" + % "\n".join(missing_settings) + ) + sys.exit(1) diff --git a/misc/compare_tasks.py b/misc/compare_tasks.py index 973970b0e..f44fe9088 100755 --- a/misc/compare_tasks.py +++ b/misc/compare_tasks.py @@ -54,7 +54,7 @@ def print_url_to_taskcluster(job_guid): (decoded_task_id, _) = job_guid.split("/") # As of slugid v2, slugid.encode() returns a string not bytestring under Python 3. taskId = slugid.encode(uuid.UUID(decoded_task_id)) - logger.info("https://firefox-ci-tc.services.mozilla.com/tasks/%s", taskId) + logger.info("https://taskcluster-ui.herokuapp.com/tasks/%s", taskId) if __name__ == "__main__": diff --git a/package.json b/package.json index 4eb9d6848..c52c8ce78 100644 --- a/package.json +++ b/package.json @@ -101,6 +101,7 @@ "codecov": "rm -rf coverage && node ./node_modules/codecov/bin/codecov", "format": "node ./node_modules/prettier/bin-prettier.js --write \"**/*.{css,html,js,jsx,json,md,yaml,yml}\"", "format:check": "node ./node_modules/prettier/bin-prettier.js --check \"**/*.{css,html,js,jsx,json,md,yaml,yml}\"", + "heroku-postbuild": "yarn build", "lint": "node ./node_modules/eslint/bin/eslint.js --report-unused-disable-directives --max-warnings 0 --format codeframe --ext js,jsx \".*.js\" \"*.js\" ui/ tests/ui/", "lint-with-cache": "node ./node_modules/eslint/bin/eslint.js --cache --report-unused-disable-directives --max-warnings 0 --format codeframe --ext js,jsx \".*.js\" \"*.js\" ui/ tests/ui/", "markdownlint": "npx markdownlint-cli -c .markdownlint.json -p .markdownlintignore .", diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..80a5b1da5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +# Our pip requirements files are kept under `requirements/`, however Heroku's +# Python buildpack only looks for `/requirements.txt`, so we use +# pip's include syntax to load the production dependencies file from here. + +-r requirements/common.txt diff --git a/runtime.txt b/runtime.txt new file mode 100644 index 000000000..ffc86033c --- /dev/null +++ b/runtime.txt @@ -0,0 +1 @@ +python-3.7.10 diff --git a/tests/model/cycle_data/test_perfherder_cycling.py b/tests/model/cycle_data/test_perfherder_cycling.py index 7a8264407..d9ea66ac9 100644 --- a/tests/model/cycle_data/test_perfherder_cycling.py +++ b/tests/model/cycle_data/test_perfherder_cycling.py @@ -777,9 +777,38 @@ def test_try_data_removal_errors_out_on_missing_try_data(try_repository): _ = try_removal_strategy.target_signatures +@patch('treeherder.config.settings.SITE_HOSTNAME', 'treeherder-prototype2.herokuapp.com') +@pytest.mark.parametrize('days', [None, 5, 30, 100]) +def test_explicit_days_validation_on_treeherder_prototype2_environment(days): + try: + _ = PerfherderCycler(10_000, 0, days=days) + except ValueError: + pytest.fail() + + try: + _ = MainRemovalStrategy(10_000, days=days) + except ValueError: + pytest.fail() + + try: + _ = TryDataRemoval(10_000, days=days) + except ValueError: + pytest.fail() + + try: + _ = IrrelevantDataRemoval(10_000, days=days) + except ValueError: + pytest.fail() + + try: + _ = StalledDataRemoval(10_000, days=days) + except ValueError: + pytest.fail() + + @patch('treeherder.config.settings.SITE_HOSTNAME', 'treeherder-production.com') @pytest.mark.parametrize('days', [5, 30, 100, 364]) -def test_explicit_days_validation_on_all_envs(days): +def test_explicit_days_validation_on_envs_other_than_treeherder_prototype2(days): with pytest.raises(ValueError): _ = PerfherderCycler(10_000, 0, days=days) diff --git a/tests/test_setup.py b/tests/test_setup.py index 2198ae7bb..2b8c86935 100644 --- a/tests/test_setup.py +++ b/tests/test_setup.py @@ -4,6 +4,7 @@ from celery import current_app from django.core.cache import cache from django.core.management import call_command +from treeherder.config.utils import get_tls_redis_url from treeherder.utils.http import fetch_text @@ -33,6 +34,18 @@ def test_django_cache(): assert cache.get(k) == v +def test_get_tls_redis_url(): + """ + Test conversion from REDIS_URL to the stunnel TLS URL described here: + https://devcenter.heroku.com/articles/securing-heroku-redis#connecting-directly-to-stunnel + """ + REDIS_URL = 'redis://h:abc8069@ec2-12-34-56-78.compute-1.amazonaws.com:8069' + TLS_REDIS_URL = ( + 'rediss://h:abc8069@ec2-12-34-56-78.compute-1.amazonaws.com:8070?ssl_cert_reqs=none' + ) + assert get_tls_redis_url(REDIS_URL) == TLS_REDIS_URL + + @current_app.task def add(x, y): return x + y diff --git a/tox.ini b/tox.ini index 8e0e381bf..4518625e0 100644 --- a/tox.ini +++ b/tox.ini @@ -28,6 +28,7 @@ commands_pre = pip install -r {toxinidir}/requirements/common.txt commands = pip check + {toxinidir}/lints/queuelint.py # This ensures that we don't loose the ability of installing locally pre-commit checks pre-commit run --all-files commands_post = @@ -40,6 +41,29 @@ commands = mkdocs build commands_post = +# This build is configured to catch issues on PRs that would only be detected as +# part of Heroku's build step when the code has already been merged to master. +# The step ./bin/post_compile requires the output of `yarn build`, thus, we need +# to build both the JS and Python builds +# In Heroku, the JS dev dependencies are *not* installed because the env variable +# YARN_PRODUCTION is detected by the buildpack, however, yarn does not care about +# that and needs an explicit flag (`--prod`) +# https://devcenter.heroku.com/articles/nodejs-support#only-installing-dependencies +# NODE_ENV=production is implicit as part of the command `yarn build` with `--mode production` +[testenv:heroku] +whitelist_externals = + yarn + post_compile +commands_pre = + pip install -r requirements.txt + yarn install --prod +commands = + yarn heroku-postbuild + ./manage.py collectstatic --noinput + # This generates the revision and does the Brotly/Gzip compression + {toxinidir}/./bin/post_compile +commands_post = + [testenv:docker] whitelist_externals= docker-compose diff --git a/treeherder/config/settings.py b/treeherder/config/settings.py index 274586b0f..073d1fc64 100644 --- a/treeherder/config/settings.py +++ b/treeherder/config/settings.py @@ -8,7 +8,7 @@ import environ from furl import furl from kombu import Exchange, Queue -from treeherder.config.utils import connection_should_use_tls +from treeherder.config.utils import connection_should_use_tls, get_tls_redis_url # TODO: Switch to pathlib once using Python 3. SRC_DIR = dirname(dirname(dirname(abspath(__file__)))) @@ -82,7 +82,7 @@ INSTALLED_APPS = [ 'treeherder.changelog', ] -# Docker/outside-of-Docker/CircleCI +# Docker/outside-of-Docker/CircleCI vs Heroku/Review-app if DEBUG: NEW_RELIC_DEVELOPER_MODE = True # This controls whether the Django debug toolbar should be shown or not @@ -94,6 +94,12 @@ if DEBUG: INSTALLED_APPS.append('debug_toolbar') INSTALLED_APPS.append('django_extensions') +# Heroku-review-app (defined in app.json) +if env("HEROKU_REVIEW_APP", default=False): + SITE_URL = "https://{}.herokuapp.com".format(env("HEROKU_APP_NAME")) + SITE_HOSTNAME = furl(SITE_URL).host + ALLOWED_HOSTS = [SITE_HOSTNAME] + # Middleware MIDDLEWARE = [ middleware @@ -105,7 +111,7 @@ MIDDLEWARE = [ 'django.middleware.clickjacking.XFrameOptionsMiddleware', 'corsheaders.middleware.CorsMiddleware', # Allows both Django static files and those specified via `WHITENOISE_ROOT` - # to be served by WhiteNoise. + # to be served by WhiteNoise, avoiding the need for Apache/nginx on Heroku. 'treeherder.middleware.CustomWhiteNoise', 'django.middleware.gzip.GZipMiddleware', 'debug_toolbar.middleware.DebugToolbarMiddleware' if DEBUG else False, @@ -152,14 +158,19 @@ for alias in DATABASES: # prevent data loss (either STRICT_TRANS_TABLES or STRICT_ALL_TABLES). 'init_command': "SET sql_mode='STRICT_TRANS_TABLES'", } - # The stage replica cert is the default cert set in the docker-compose.yml file. - # Additonal certs for local development are in the deployment directory. - DATABASES[alias]['OPTIONS']['ssl'] = { - 'ca': env("TLS_CERT_PATH", default=None), - } + if connection_should_use_tls(DATABASES[alias]['HOST']): + # The default cert is for access to the stage replica; for accessing + # prototype this variable will need to reference deployment/gcp/ca-cert-prototype.pem. + # See treeherder docs for more details. + DATABASES[alias]['OPTIONS']['ssl'] = { + 'ca': env("TLS_CERT_PATH", default='deployment/gcp/ca-cert.pem'), + } # Caches REDIS_URL = env('REDIS_URL', default='redis://localhost:6379') +if connection_should_use_tls(REDIS_URL): + # Connect using TLS on Heroku. + REDIS_URL = get_tls_redis_url(REDIS_URL) CACHES = { 'default': { @@ -347,6 +358,7 @@ CELERY_TASK_DEFAULT_QUEUE = 'default' # Make Celery defer the acknowledgment of a task until after the task has completed, # to prevent data loss in the case of celery master process crashes or infra failures. +# https://devcenter.heroku.com/articles/celery-heroku#using-acks_late # http://docs.celeryproject.org/en/latest/userguide/tasks.html#Task.acks_late CELERY_TASK_ACKS_LATE = True diff --git a/treeherder/config/utils.py b/treeherder/config/utils.py index d4183b6e2..6766fe404 100644 --- a/treeherder/config/utils.py +++ b/treeherder/config/utils.py @@ -2,6 +2,34 @@ from furl import furl def connection_should_use_tls(url): - # Ensure use of celery workers for local development + # Services such as RabbitMQ/MySQL running on Travis do not yet have TLS + # certificates set up. We could try using TLS locally using self-signed certs, + # but until Travis has support it's not overly useful. host = furl(url).host or url # The url passed is already just the hostname. - return host not in ('127.0.0.1', 'localhost', 'mysql', 'rabbitmq') + return host not in ('127.0.0.1', 'localhost', 'mysql', 'redis', 'rabbitmq') + + +def get_tls_redis_url(redis_url): + """ + Returns the TLS version of a Heroku REDIS_URL string. + + Whilst Redis server (like memcached) doesn't natively support TLS, Heroku runs an stunnel + daemon on their Redis instances, which can be connected to directly by Redis clients that + support TLS (avoiding the need for stunnel on the client). The stunnel port is one higher + than the Redis server port, and the informal `rediss://` scheme used to instruct clients + to wrap the connection with TLS. + + Will convert 'redis://h:PASSWORD@INSTANCE.compute-1.amazonaws.com:8409' + ...to: 'rediss://h:PASSWORD@INSTANCE.compute-1.amazonaws.com:8410?ssl_cert_reqs=none' + + See: + https://devcenter.heroku.com/articles/securing-heroku-redis#connecting-directly-to-stunnel + """ + url = furl(redis_url) + url.port += 1 + url.scheme += 's' + # Disable TLS certificate validation (restoring the behaviour of the older redis-py 2.x), + # since for now Heroku Redis uses self-signed certificates: + # https://bugzilla.mozilla.org/show_bug.cgi?id=1510000 + url.args['ssl_cert_reqs'] = 'none' + return str(url) diff --git a/treeherder/etl/management/commands/heroku_environment.py b/treeherder/etl/management/commands/heroku_environment.py new file mode 100755 index 000000000..68ca09f5e --- /dev/null +++ b/treeherder/etl/management/commands/heroku_environment.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +""" Script to validate each Heroku app as part of the release step or from local environment + +Locally an engineer with access to Heroku can use this script because we can authenticate +with the Heroku CLI and fetch the env variables. For now you will need to install the Heroku CLI +(https://devcenter.heroku.com/articles/heroku-cli) rather than figurying out how to authenticate +using the APIs (https://devcenter.heroku.com/articles/platform-api-quickstart). + +In order to authenticate your machine first call `heroku auth:login` from the command. + +Plan: + +* Add more values to MUST_BE_SET and MUST_BE_SET_TO +* Providing no --app-name validates all 5 Heroku apps +* Count that the number of environment variables is not larger than what expected +* See if we can define MUST_BE_SET variables in treeherder/config/settings.py and import it here +* Document usage +* Add support for validation in the context of bin/pre_deploy + * Do not fetch config vars but check environment directly +* This code could be moved into a Pypi package and permit other companies to use it + +Once this script is further along we can call it from ./bin/pre_deploy which is part of the +release step for Heroku apps. Failing to pass the script would prevent an app to be deployed +if it has missing or incorrect values. The user will then have to add the environment variable +to the failing Heroku app and retry the release step. + +Ideally we would catch Heroku app misconfigurations in Travis, however, that would require +fetching env variables from Travis and risk leakage. If we wanted to we could use Taskcluster +to run a task for us on every commit when treeherder/config/settings.py is touched. We should +make sure that logs from the task are only viewable by certain members with scopes. Without this +restriction the idea is a no-go since we could inadvertadely leak secrets. +""" +import logging +import sys + +import requests +from django.core.management.base import BaseCommand + +from treeherder.utils.http import make_request + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +# These are environment variables that need to be set +MUST_BE_SET = {"treeherder-stage": ["NEW_RELIC_INSIGHTS_API_KEY", "THIS_ONE_WILL_FAIL"]} + +# These are environment variables that need to be set to a specific value +MUST_BE_SET_TO = { + "treeherder-stage": { + "BUGZILLA_API_URL": "https://bugzilla.mozilla.org", + "THIS_ONE_WILL_FAIL": "foo", + } +} + + +def request(path="", method="GET"): + return make_request( + "https://api.heroku.com/apps/{}".format(path), + method=method, + headers={'Accept': 'application/vnd.heroku+json; version=3'}, + ) + + +def authenticate(): + try: + request(method="POST") + except requests.exceptions.HTTPError as error: + if error.response.status_code == 401: + logger.critical( + "Run heroku auth:login to authenticate the terminal before calling this script." + ) + sys.exit(-1) + + +def get_config_vars(app_name): + response = request("{}/config-vars".format(app_name)) + return response.json() + + +class Command(BaseCommand): + """Management command to validate Heroku environment variables.""" + + def add_arguments(self, parser): + parser.add_argument("--app", help="Heroku app name") + + def handle(self, *args, **options): + app_name = options["app"] + assert app_name in ["treeherder-stage"], "Choose a valid Heroku app name with --app" + authenticate() + # NOTE: Be councious that the secrets contained in here are only accessible + # if you have Heroku access to the apps OR that executing inside bin/pre_deploy + # also requires Heroku access to view + config_vars = get_config_vars(app_name) + + errors = False + + # Validate that these are set + for key in MUST_BE_SET[app_name]: + try: + config_vars[key] + except KeyError: + logger.error("{} must be set.".format(key)) + errors = True + + # Validate that it is set to a specific value + for key, value in MUST_BE_SET_TO[app_name].items(): + try: + if config_vars[key] != value: + errors = True + + except KeyError: + logger.error("{} must be set to this value: {}.".format(key, value)) + errors = True + + if errors: + sys.exit(-1) diff --git a/treeherder/middleware.py b/treeherder/middleware.py index 7866e38b4..b061595c5 100644 --- a/treeherder/middleware.py +++ b/treeherder/middleware.py @@ -19,7 +19,7 @@ CSP_DIRECTIVES = [ "font-src 'self' https://fonts.gstatic.com", # The `data:` is required for images that were inlined by webpack's url-loader (as an optimisation). "img-src 'self' data:", - "connect-src 'self' https://community-tc.services.mozilla.com https://firefox-ci-tc.services.mozilla.com https://*.taskcluster-artifacts.net https://taskcluster-artifacts.net https://treestatus.mozilla-releng.net https://bugzilla.mozilla.org https://auth.mozilla.auth0.com https://stage.taskcluster.nonprod.cloudops.mozgcp.net https://artifacts.tcstage.mozaws.net https://*.artifacts.tcstage.mozaws.net https://insights-api.newrelic.com https://prototype.treeherder.nonprod.cloudops.mozgcp.net https://treeherder.allizom.org", + "connect-src 'self' https://community-tc.services.mozilla.com https://firefox-ci-tc.services.mozilla.com https://*.taskcluster-artifacts.net https://taskcluster-artifacts.net https://treestatus.mozilla-releng.net https://bugzilla.mozilla.org https://auth.mozilla.auth0.com https://stage.taskcluster.nonprod.cloudops.mozgcp.net/ https://artifacts.tcstage.mozaws.net/ https://*.artifacts.tcstage.mozaws.net/ https://insights-api.newrelic.com", # Required since auth0-js performs session renewals in an iframe. "frame-src 'self' https://auth.mozilla.auth0.com", "report-uri {}".format(reverse('csp-report')), diff --git a/treeherder/model/data_cycling/utils.py b/treeherder/model/data_cycling/utils.py index bdbd2dedd..973e7f54d 100644 --- a/treeherder/model/data_cycling/utils.py +++ b/treeherder/model/data_cycling/utils.py @@ -1,8 +1,13 @@ +from treeherder.config import settings + + def has_valid_explicit_days(func): def wrapper(*args, **kwargs): days = kwargs.get('days') - if days is not None: - raise ValueError('Cannot override performance data retention parameters.') + if (days is not None) and settings.SITE_HOSTNAME != 'treeherder-prototype2.herokuapp.com': + raise ValueError( + 'Cannot override perf data retention parameters on projects other than treeherder-prototype2' + ) func(*args, **kwargs) return wrapper diff --git a/treeherder/model/management/commands/cycle_data.py b/treeherder/model/management/commands/cycle_data.py index dbf74a86c..b34b94d0c 100644 --- a/treeherder/model/management/commands/cycle_data.py +++ b/treeherder/model/management/commands/cycle_data.py @@ -32,7 +32,11 @@ class Command(BaseCommand): action='store', dest='days', type=int, - help=("Data cycle interval expressed in days. This only applies to Treeherder"), + help=( + "Data cycle interval expressed in days. " + "On Perfherder specifically, this only applies for `treeherder-prototype2` " + "environment; supplying it for other environments is illegal." + ), ) parser.add_argument( '--chunk-size', diff --git a/ui/taskcluster-auth-callback/constants.js b/ui/taskcluster-auth-callback/constants.js index a5bf24224..5a4218eb0 100644 --- a/ui/taskcluster-auth-callback/constants.js +++ b/ui/taskcluster-auth-callback/constants.js @@ -3,11 +3,11 @@ import { tcAuthCallbackUrl } from '../helpers/url'; export const tcClientIdMap = { 'https://treeherder.mozilla.org': 'production', 'https://treeherder.allizom.org': 'stage', - 'https://prototype.treeherder.nonprod.cloudops.mozgcp.net/': 'dev', + 'https://treeherder-prototype.herokuapp.com': 'dev', 'http://localhost:5000': 'localhost-5000', 'http://localhost:8000': 'localhost-8000', - 'https://tc-staging.treeherder.nonprod.cloudops.mozgcp.net/': - 'taskcluster-staging', + 'https://treeherder-taskcluster-staging.herokuapp.com': 'taskcluster-staging', + 'https://treeherder-prototype2.herokuapp.com': 'dev2', }; export const clientId = `treeherder-${