From df8c6f79f1112e5d50053faadcd6a8ae66fafb35 Mon Sep 17 00:00:00 2001 From: Barret Rennie Date: Tue, 3 Sep 2019 22:05:26 +0000 Subject: [PATCH] Bug 1563090 - Add a visual metrics treeherder task r=nalexander,tomprince This new task fetches the visualmetrics.py script from the github.com/mozilla/browsertime repository and runs it in parallel for the specified jobs. Jobs are specified in a JSON blob passed through to the task in an environment variable. A follow up patch specifies a command line argument to make this configuration available to `./mach try {fuzzy|chooser}` Differential Revision: https://phabricator.services.mozilla.com/D41052 --HG-- extra : moz-landing-system : lando --- taskcluster/ci/config.yml | 1 + taskcluster/ci/docker-image/kind.yml | 3 + taskcluster/ci/fetch/kind.yml | 1 + taskcluster/ci/fetch/visual-metrics.yml | 11 + taskcluster/ci/visual-metrics/kind.yml | 42 ++ taskcluster/docker/visual-metrics/Dockerfile | 23 ++ .../docker/visual-metrics/requirements.txt | 12 + .../visual-metrics/run-visual-metrics.py | 383 ++++++++++++++++++ taskcluster/docs/kinds.rst | 5 + tools/lint/py2.yml | 1 + 10 files changed, 482 insertions(+) create mode 100644 taskcluster/ci/fetch/visual-metrics.yml create mode 100644 taskcluster/ci/visual-metrics/kind.yml create mode 100644 taskcluster/docker/visual-metrics/Dockerfile create mode 100644 taskcluster/docker/visual-metrics/requirements.txt create mode 100644 taskcluster/docker/visual-metrics/run-visual-metrics.py diff --git a/taskcluster/ci/config.yml b/taskcluster/ci/config.yml index 1b892bce6daa..6103f39cd085 100755 --- a/taskcluster/ci/config.yml +++ b/taskcluster/ci/config.yml @@ -137,6 +137,7 @@ treeherder: 'java': 'Java checks' 'SS': 'Shadow scheduler' 'test-info': 'Test manifest skip/fail information' + 'vismet': 'Visual Metrics Analsyis' index: products: diff --git a/taskcluster/ci/docker-image/kind.yml b/taskcluster/ci/docker-image/kind.yml index 478e1df72b1a..eba1659d6e62 100644 --- a/taskcluster/ci/docker-image/kind.yml +++ b/taskcluster/ci/docker-image/kind.yml @@ -233,3 +233,6 @@ jobs: webrender-updater: symbol: I(wrupdater) parent: debian9-base + visual-metrics: + symbol: I(visual-metrics) + parent: debian9-base diff --git a/taskcluster/ci/fetch/kind.yml b/taskcluster/ci/fetch/kind.yml index d179b2166ed4..564f09ae7501 100644 --- a/taskcluster/ci/fetch/kind.yml +++ b/taskcluster/ci/fetch/kind.yml @@ -14,3 +14,4 @@ jobs-from: - benchmarks.yml - toolchains.yml - chromium-fetch.yml + - visual-metrics.yml diff --git a/taskcluster/ci/fetch/visual-metrics.yml b/taskcluster/ci/fetch/visual-metrics.yml new file mode 100644 index 000000000000..bf2c9a14329a --- /dev/null +++ b/taskcluster/ci/fetch/visual-metrics.yml @@ -0,0 +1,11 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +--- +visual-metrics: + description: "Browsertime visual metrics analsyis script" + fetch: + type: static-url + url: https://raw.githubusercontent.com/mozilla/browsertime/4745d29bb5f8cd60bebe5287d5ba4c996a9d0ae4/vendor/visualmetrics.py + sha256: 9e587fb43c46dd0c37a15bc7688cf061c82592196eafe3ddd54e2ead27997d66 + size: 82756 diff --git a/taskcluster/ci/visual-metrics/kind.yml b/taskcluster/ci/visual-metrics/kind.yml new file mode 100644 index 000000000000..e8233acf73a2 --- /dev/null +++ b/taskcluster/ci/visual-metrics/kind.yml @@ -0,0 +1,42 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +--- +loader: taskgraph.loader.transform:loader + +kind-dependencies: + - fetch + +transforms: + - taskgraph.transforms.job:transforms + - taskgraph.transforms.task:transforms + +jobs: + visual-metrics: + label: visual-metrics + description: "Run visual metrics calculations" + run-on-projects: ['try'] + worker-type: t-linux-xlarge + + treeherder: + symbol: vismet(visual-metrics) + platform: visualmetrics/opt + tier: 2 + kind: other + + worker: + docker-image: {in-tree: visual-metrics} + max-run-time: 9000 + artifacts: + - type: file + name: public/visual-metrics.tar.xz + path: /builds/worker/visual-metrics.tar.xz + + fetches: + fetch: + - visual-metrics + + run: + using: run-task + command: /builds/worker/bin/run-visual-metrics.py -- --orange --perceptual --contentful --force --renderignore 5 --json --viewport + checkout: false diff --git a/taskcluster/docker/visual-metrics/Dockerfile b/taskcluster/docker/visual-metrics/Dockerfile new file mode 100644 index 000000000000..940cabb48cfd --- /dev/null +++ b/taskcluster/docker/visual-metrics/Dockerfile @@ -0,0 +1,23 @@ +# %ARG DOCKER_IMAGE_PARENT +FROM $DOCKER_IMAGE_PARENT +MAINTAINER Barret Rennie + +RUN apt-get update && \ + apt-get install \ + ffmpeg \ + imagemagick \ + pyssim \ + python \ + python-pil \ + python3 \ + python3-pip + +COPY requirements.txt /builds/worker/requirements.txt +RUN pip3 install --require-hashes -r /builds/worker/requirements.txt && \ + rm /builds/worker/requirements.txt + +COPY run-visual-metrics.py /builds/worker/bin/run-visual-metrics.py +RUN chmod +x /builds/worker/bin/run-visual-metrics.py + +VOLUME /builds/worker/workspace/ +VOLUME /builds/worker/artifacts/ diff --git a/taskcluster/docker/visual-metrics/requirements.txt b/taskcluster/docker/visual-metrics/requirements.txt new file mode 100644 index 000000000000..174b9c396eb8 --- /dev/null +++ b/taskcluster/docker/visual-metrics/requirements.txt @@ -0,0 +1,12 @@ +# Direct dependencies +attrs==19.1.0 --hash=sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79 +requests==2.22.0 --hash=sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31 +structlog==19.1.0 --hash=sha256:db441b81c65b0f104a7ce5d86c5432be099956b98b8a2c8be0b3fb3a7a0b1536 +voluptuous==0.11.5 --hash=sha256:303542b3fc07fb52ec3d7a1c614b329cdbee13a9d681935353d8ea56a7bfa9f1 + +# Transitive dependencies +certifi==2019.6.16 --hash=sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939 +chardet==3.0.4 --hash=sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691 +idna==2.8 --hash=sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c +six==1.12.0 --hash=sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c +urllib3==1.25.3 --hash=sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1 diff --git a/taskcluster/docker/visual-metrics/run-visual-metrics.py b/taskcluster/docker/visual-metrics/run-visual-metrics.py new file mode 100644 index 000000000000..243ee5c76a29 --- /dev/null +++ b/taskcluster/docker/visual-metrics/run-visual-metrics.py @@ -0,0 +1,383 @@ +#!/usr/bin/env python3 +"""Instrument visualmetrics.py to run in parallel. + +Environment variables: + + VISUAL_METRICS_JOBS_JSON: + A JSON blob containing the job descriptions. + + Can be overridden with the --jobs-json-path option set to a local file + path. +""" + +import argparse +import os +import json +import sys +from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor +from functools import partial +from multiprocessing import cpu_count +from pathlib import Path + +import attr +import requests +import structlog +import subprocess +from voluptuous import Required, Schema, Url + +#: The workspace directory where files will be downloaded, etc. +WORKSPACE_DIR = Path("/", "builds", "worker", "workspace") + +#: The directory where job artifacts will be stored. +WORKSPACE_JOBS_DIR = WORKSPACE_DIR / "jobs" + +#: The directory where artifacts from this job will be placed. +OUTPUT_DIR = Path("/", "builds", "worker", "artifacts") + +#: A job to process through visualmetrics.py +@attr.s +class Job: + #: The directory for all the files pertaining to the job. + job_dir = attr.ib(type=Path) + + #: json_path: The path to the ``browsertime.json`` file on disk. + json_path = attr.ib(type=Path) + + #: json_url: The URL of the ``browsertime.json`` file. + json_url = attr.ib(type=str) + + #: video_path: The path of the video file on disk. + video_path = attr.ib(type=Path) + + #: video_url: The URl of the video file. + video_url = attr.ib(type=str) + + +#: The schema for validating jobs. +JOB_SCHEMA = Schema( + { + Required("jobs"): [ + { + Required("browsertime_json_url"): Url(), + Required("video_url"): Url(), + } + ] + } +) + + +def main(log, args): + """Run visualmetrics.py in parallel. + + Args: + log: The structlog logger instance. + args: The parsed arguments from the argument parser. + + Returns: + The return code that the program will exit with. + """ + fetch_dir = os.getenv("MOZ_FETCHES_DIR") + if not fetch_dir: + log.error("Expected MOZ_FETCHES_DIR environment variable.") + return 1 + + visualmetrics_path = Path(fetch_dir) / "visualmetrics.py" + if not visualmetrics_path.exists(): + log.error( + "Could not locate visualmetrics.py: expected it at %s" + % visualmetrics_path + ) + return 1 + + if args.jobs_json_path: + try: + with open(args.jobs_json_path, "r") as f: + jobs_json = json.load(f) + except Exception as e: + log.error( + "Could not read jobs.json file: %s" % e, + path=args.jobs_json_path, + exc_info=True, + ) + return 1 + + log.info( + "Loaded jobs.json from file", + path=args.jobs_json_path, + jobs_json=jobs_json, + ) + + else: + raw_jobs_json = os.getenv("VISUAL_METRICS_JOBS_JSON") + if raw_jobs_json is not None and isinstance(raw_jobs_json, bytes): + raw_jobs_json = raw_jobs_json.decode("utf-8") + elif raw_jobs_json is None: + log.error( + "Expected one of --jobs-json-path or " + "VISUAL_METRICS_JOBS_JSON environment variable." + ) + return 1 + + try: + jobs_json = json.loads(raw_jobs_json) + except (TypeError, ValueError) as e: + log.error( + "Failed to decode VISUAL_METRICS_JOBS_JSON environment " + "variable: %s" % e, + value=raw_jobs_json, + ) + return 1 + + log.info("Parsed jobs.json from environment", jobs_json=jobs_json) + + try: + JOB_SCHEMA(jobs_json) + except Exception as e: + log.error("Failed to parse jobs.json: %s" % e) + return 1 + + try: + downloaded_jobs, failed_jobs = download_inputs(log, jobs_json["jobs"]) + except Exception as e: + log.error("Failed to download jobs: %s" % e, exc_info=True) + return 1 + + with ProcessPoolExecutor(max_workers=cpu_count()) as executor: + for job, result in zip( + downloaded_jobs, + executor.map( + partial( + run_visual_metrics, + visualmetrics_path=visualmetrics_path, + options=args.visual_metrics_options, + ), + downloaded_jobs, + ), + ): + if isinstance(result, Exception): + log.error( + "Failed to run visualmetrics.py", + video_url=job.video_url, + error=result, + ) + else: + with (job.job_dir / "visual-metrics.json").open("wb") as f: + f.write(result) + + OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + + with Path(WORKSPACE_DIR, "jobs.json").open("w") as f: + json.dump( + { + "successful_jobs": [ + { + "video_url": job.video_url, + "browsertime_json_url": job.json_url, + "path": ( + str(job.job_dir.relative_to(WORKSPACE_DIR)) + "/" + ), + } + for job in downloaded_jobs + ], + "failed_jobs": [ + { + "video_url": job.video_url, + "browsertime_json_url": job.json_url, + } + for job in failed_jobs + ], + }, + f, + ) + + subprocess.check_output( + [ + "tar", + "cJf", + str(OUTPUT_DIR / "visual-metrics.tar.xz"), + "-C", + str(WORKSPACE_DIR), + ".", + ] + ) + + +def download_inputs(log, raw_jobs): + """Download the inputs for all jobs in parallel. + + Args: + log: The structlog logger instance. + raw_jobs: The list of unprocessed jobs from the ``jobs.json`` input file. + + Returns: + A tuple of the successfully downloaded jobs and the failed to download jobs. + """ + WORKSPACE_DIR.mkdir(parents=True, exist_ok=True) + + pending_jobs = [] + for i, job in enumerate(raw_jobs): + job_dir = WORKSPACE_JOBS_DIR / str(i) + job_dir.mkdir(parents=True, exist_ok=True) + + pending_jobs.append( + Job( + job_dir, + job_dir / "browsertime.json", + job["browsertime_json_url"], + job_dir / "video", + job["video_url"], + ) + ) + + downloaded_jobs = [] + failed_jobs = [] + + with ThreadPoolExecutor(max_workers=8) as executor: + for job, success in executor.map( + partial(download_job, log), pending_jobs + ): + if success: + downloaded_jobs.append(job) + else: + job.job_dir.rmdir() + failed_jobs.append(job) + + return downloaded_jobs, failed_jobs + + +def download_job(log, job): + """Download the files for a given job. + + Args: + log: The structlog logger instance. + job: The job to download. + + Returns: + A tuple of the job and whether or not the download was successful. + + The returned job will be updated so that it's :attr:`Job.video_path` + attribute is updated to match the file path given by the video file + in the ``browsertime.json`` file. + """ + log = log.bind(json_url=job.json_url) + try: + download(job.video_url, job.video_path) + download(job.json_url, job.json_path) + except Exception as e: + log.error( + "Failed to download files for job: %s" % e, + video_url=job.video_url, + exc_info=True, + ) + return job, False + + try: + with job.json_path.open("r") as f: + browsertime_json = json.load(f) + except OSError as e: + log.error("Could not read browsertime.json: %s" % e) + return job, False + except ValueError as e: + log.error("Could not parse browsertime.json as JSON: %s" % e) + return job, False + + try: + video_path = job.job_dir / browsertime_json[0]["files"]["video"][0] + except KeyError: + log.error("Could not read video path from browsertime.json file") + return job, False + + video_path.parent.mkdir(parents=True, exist_ok=True) + + job.video_path.rename(video_path) + job.video_path = video_path + + return job, True + + +def download(url, path): + """Download the resource at the given URL to the local path. + + Args: + url: The URL of the resource to download. + path: The local path to download the resource to. + + Raises: + OSError: + Raised if an IO error occurs while writing the file. + + requests.exceptions.HTTPError: + Raised when an HTTP error (including e.g., HTTP 404) occurs. + """ + request = requests.get(url, stream=True) + request.raise_for_status() + + path.parent.mkdir(parents=True, exist_ok=True) + + with path.open("wb") as f: + for chunk in request: + f.write(chunk) + + +def run_visual_metrics(job, visualmetrics_path, options): + """Run visualmetrics.py on the input job. + + Returns: + Either a string containing the JSON output of visualmetrics.py or an + exception raised by :func:`subprocess.check_output`. + """ + cmd = [ + "/usr/bin/python", + str(visualmetrics_path), + "--video", + str(job.video_path), + ] + + cmd.extend(options) + + try: + return subprocess.check_output(cmd) + except subprocess.CalledProcessError as e: + return e + + +if __name__ == "__main__": + structlog.configure( + processors=[ + structlog.processors.TimeStamper(fmt="iso"), + structlog.processors.format_exc_info, + structlog.dev.ConsoleRenderer(colors=False), + ], + cache_logger_on_first_use=True, + ) + + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "--jobs-json-path", + type=Path, + metavar="PATH", + help=( + "The path to the jobs.josn file. If not present, the " + "VISUAL_METRICS_JOBS_JSON environment variable will be used " + "instead." + ), + ) + parser.add_argument( + "visual_metrics_options", + type=str, + metavar="VISUAL-METRICS-OPTIONS", + help="Options to pass to visualmetrics.py", + nargs="*", + ) + + args = parser.parse_args() + log = structlog.get_logger() + + try: + sys.exit(main(log, args)) + except Exception as e: + log.error("Unhandled exception: %s" % e, exc_info=True) + sys.exit(1) diff --git a/taskcluster/docs/kinds.rst b/taskcluster/docs/kinds.rst index db0d4fcf7ecf..825fa673bdd1 100644 --- a/taskcluster/docs/kinds.rst +++ b/taskcluster/docs/kinds.rst @@ -598,3 +598,8 @@ test archive into it's own archive. geckodriver-signing ------------------- Signing for geckodriver binary. + +visual-metrics +-------------- +Tasks that compute visual performance metrics from videos and images captured +by other tasks. diff --git a/tools/lint/py2.yml b/tools/lint/py2.yml index 24635f1fd402..1b241538390a 100644 --- a/tools/lint/py2.yml +++ b/tools/lint/py2.yml @@ -19,6 +19,7 @@ py2: - security - servo - taskcluster/docker/funsize-update-generator + - taskcluster/docker/visual-metrics - testing/gtest - testing/mochitest - testing/mozharness