Bug 1563090 - Add a visual metrics treeherder task r=nalexander,tomprince

This new task fetches the visualmetrics.py script from the github.com/mozilla/browsertime repository and runs it in parallel for the specified jobs. Jobs are specified in a JSON blob passed through to the task in an environment variable. A follow up patch specifies a command line argument to make this configuration available to `./mach try {fuzzy|chooser}` Differential Revision: https://phabricator.services.mozilla.com/D41052 --HG-- extra : moz-landing-system : lando
2019-09-03 22:05:26 +00:00 · 2019-09-03 22:05:26 +00:00 · df8c6f79f1
--- a/taskcluster/ci/config.yml
+++ b/taskcluster/ci/config.yml
@ -137,6 +137,7 @@ treeherder:
        'java': 'Java checks'
        'SS': 'Shadow scheduler'
        'test-info': 'Test manifest skip/fail information'
+        'vismet': 'Visual Metrics Analsyis'

 index:
    products:
--- a/taskcluster/ci/docker-image/kind.yml
+++ b/taskcluster/ci/docker-image/kind.yml
@ -233,3 +233,6 @@ jobs:
    webrender-updater:
        symbol: I(wrupdater)
        parent: debian9-base
+    visual-metrics:
+        symbol: I(visual-metrics)
+        parent: debian9-base
--- a/taskcluster/ci/fetch/kind.yml
+++ b/taskcluster/ci/fetch/kind.yml
@ -14,3 +14,4 @@ jobs-from:
    - benchmarks.yml
    - toolchains.yml
    - chromium-fetch.yml
+    - visual-metrics.yml
--- a/taskcluster/ci/fetch/visual-metrics.yml
+++ b/taskcluster/ci/fetch/visual-metrics.yml
@ -0,0 +1,11 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+---
+visual-metrics:
+    description: "Browsertime visual metrics analsyis script"
+    fetch:
+        type: static-url
+        url: https://raw.githubusercontent.com/mozilla/browsertime/4745d29bb5f8cd60bebe5287d5ba4c996a9d0ae4/vendor/visualmetrics.py
+        sha256: 9e587fb43c46dd0c37a15bc7688cf061c82592196eafe3ddd54e2ead27997d66
+        size: 82756
--- a/taskcluster/ci/visual-metrics/kind.yml
+++ b/taskcluster/ci/visual-metrics/kind.yml
@ -0,0 +1,42 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+---
+loader: taskgraph.loader.transform:loader
+
+kind-dependencies:
+    - fetch
+
+transforms:
+    - taskgraph.transforms.job:transforms
+    - taskgraph.transforms.task:transforms
+
+jobs:
+    visual-metrics:
+        label: visual-metrics
+        description: "Run visual metrics calculations"
+        run-on-projects: ['try']
+        worker-type: t-linux-xlarge
+
+        treeherder:
+            symbol: vismet(visual-metrics)
+            platform: visualmetrics/opt
+            tier: 2
+            kind: other
+
+        worker:
+            docker-image: {in-tree: visual-metrics}
+            max-run-time: 9000
+            artifacts:
+                - type: file
+                  name: public/visual-metrics.tar.xz
+                  path: /builds/worker/visual-metrics.tar.xz
+
+        fetches:
+            fetch:
+                - visual-metrics
+
+        run:
+            using: run-task
+            command: /builds/worker/bin/run-visual-metrics.py -- --orange --perceptual --contentful --force --renderignore 5 --json --viewport
+            checkout: false
--- a/taskcluster/docker/visual-metrics/Dockerfile
+++ b/taskcluster/docker/visual-metrics/Dockerfile
@ -0,0 +1,23 @@
+# %ARG DOCKER_IMAGE_PARENT
+FROM $DOCKER_IMAGE_PARENT
+MAINTAINER Barret Rennie <barret@mozilla.com>
+
+RUN apt-get update && \
+    apt-get install \
+      ffmpeg \
+      imagemagick \
+      pyssim \
+      python \
+      python-pil \
+      python3 \
+      python3-pip
+
+COPY requirements.txt /builds/worker/requirements.txt
+RUN pip3 install --require-hashes -r /builds/worker/requirements.txt && \
+    rm /builds/worker/requirements.txt
+
+COPY run-visual-metrics.py /builds/worker/bin/run-visual-metrics.py
+RUN chmod +x /builds/worker/bin/run-visual-metrics.py
+
+VOLUME /builds/worker/workspace/
+VOLUME /builds/worker/artifacts/
--- a/taskcluster/docker/visual-metrics/requirements.txt
+++ b/taskcluster/docker/visual-metrics/requirements.txt
@ -0,0 +1,12 @@
+# Direct dependencies
+attrs==19.1.0 --hash=sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79
+requests==2.22.0 --hash=sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31
+structlog==19.1.0 --hash=sha256:db441b81c65b0f104a7ce5d86c5432be099956b98b8a2c8be0b3fb3a7a0b1536
+voluptuous==0.11.5 --hash=sha256:303542b3fc07fb52ec3d7a1c614b329cdbee13a9d681935353d8ea56a7bfa9f1
+
+# Transitive dependencies
+certifi==2019.6.16 --hash=sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939
+chardet==3.0.4 --hash=sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691
+idna==2.8 --hash=sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c
+six==1.12.0 --hash=sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c
+urllib3==1.25.3 --hash=sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1
--- a/taskcluster/docker/visual-metrics/run-visual-metrics.py
+++ b/taskcluster/docker/visual-metrics/run-visual-metrics.py
@ -0,0 +1,383 @@
+#!/usr/bin/env python3
+"""Instrument visualmetrics.py to run in parallel.
+
+Environment variables:
+
+  VISUAL_METRICS_JOBS_JSON:
+    A JSON blob containing the job descriptions.
+
+    Can be overridden with the --jobs-json-path option set to a local file
+    path.
+"""
+
+import argparse
+import os
+import json
+import sys
+from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
+from functools import partial
+from multiprocessing import cpu_count
+from pathlib import Path
+
+import attr
+import requests
+import structlog
+import subprocess
+from voluptuous import Required, Schema, Url
+
+#: The workspace directory where files will be downloaded, etc.
+WORKSPACE_DIR = Path("/", "builds", "worker", "workspace")
+
+#: The directory where job artifacts will be stored.
+WORKSPACE_JOBS_DIR = WORKSPACE_DIR / "jobs"
+
+#: The directory where artifacts from this job will be placed.
+OUTPUT_DIR = Path("/", "builds", "worker", "artifacts")
+
+#: A job to process through visualmetrics.py
+@attr.s
+class Job:
+    #: The directory for all the files pertaining to the job.
+    job_dir = attr.ib(type=Path)
+
+    #: json_path: The path to the ``browsertime.json`` file on disk.
+    json_path = attr.ib(type=Path)
+
+    #: json_url: The URL of the ``browsertime.json`` file.
+    json_url = attr.ib(type=str)
+
+    #: video_path: The path of the video file on disk.
+    video_path = attr.ib(type=Path)
+
+    #: video_url: The URl of the video file.
+    video_url = attr.ib(type=str)
+
+
+#: The schema for validating jobs.
+JOB_SCHEMA = Schema(
+    {
+        Required("jobs"): [
+            {
+                Required("browsertime_json_url"): Url(),
+                Required("video_url"): Url(),
+            }
+        ]
+    }
+)
+
+
+def main(log, args):
+    """Run visualmetrics.py in parallel.
+
+    Args:
+        log: The structlog logger instance.
+        args: The parsed arguments from the argument parser.
+
+    Returns:
+        The return code that the program will exit with.
+    """
+    fetch_dir = os.getenv("MOZ_FETCHES_DIR")
+    if not fetch_dir:
+        log.error("Expected MOZ_FETCHES_DIR environment variable.")
+        return 1
+
+    visualmetrics_path = Path(fetch_dir) / "visualmetrics.py"
+    if not visualmetrics_path.exists():
+        log.error(
+            "Could not locate visualmetrics.py: expected it at %s"
+            % visualmetrics_path
+        )
+        return 1
+
+    if args.jobs_json_path:
+        try:
+            with open(args.jobs_json_path, "r") as f:
+                jobs_json = json.load(f)
+        except Exception as e:
+            log.error(
+                "Could not read jobs.json file: %s" % e,
+                path=args.jobs_json_path,
+                exc_info=True,
+            )
+            return 1
+
+        log.info(
+            "Loaded jobs.json from file",
+            path=args.jobs_json_path,
+            jobs_json=jobs_json,
+        )
+
+    else:
+        raw_jobs_json = os.getenv("VISUAL_METRICS_JOBS_JSON")
+        if raw_jobs_json is not None and isinstance(raw_jobs_json, bytes):
+            raw_jobs_json = raw_jobs_json.decode("utf-8")
+        elif raw_jobs_json is None:
+            log.error(
+                "Expected one of --jobs-json-path or "
+                "VISUAL_METRICS_JOBS_JSON environment variable."
+            )
+            return 1
+
+        try:
+            jobs_json = json.loads(raw_jobs_json)
+        except (TypeError, ValueError) as e:
+            log.error(
+                "Failed to decode VISUAL_METRICS_JOBS_JSON environment "
+                "variable: %s" % e,
+                value=raw_jobs_json,
+            )
+            return 1
+
+        log.info("Parsed jobs.json from environment", jobs_json=jobs_json)
+
+    try:
+        JOB_SCHEMA(jobs_json)
+    except Exception as e:
+        log.error("Failed to parse jobs.json: %s" % e)
+        return 1
+
+    try:
+        downloaded_jobs, failed_jobs = download_inputs(log, jobs_json["jobs"])
+    except Exception as e:
+        log.error("Failed to download jobs: %s" % e, exc_info=True)
+        return 1
+
+    with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
+        for job, result in zip(
+            downloaded_jobs,
+            executor.map(
+                partial(
+                    run_visual_metrics,
+                    visualmetrics_path=visualmetrics_path,
+                    options=args.visual_metrics_options,
+                ),
+                downloaded_jobs,
+            ),
+        ):
+            if isinstance(result, Exception):
+                log.error(
+                    "Failed to run visualmetrics.py",
+                    video_url=job.video_url,
+                    error=result,
+                )
+            else:
+                with (job.job_dir / "visual-metrics.json").open("wb") as f:
+                    f.write(result)
+
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+
+    with Path(WORKSPACE_DIR, "jobs.json").open("w") as f:
+        json.dump(
+            {
+                "successful_jobs": [
+                    {
+                        "video_url": job.video_url,
+                        "browsertime_json_url": job.json_url,
+                        "path": (
+                            str(job.job_dir.relative_to(WORKSPACE_DIR)) + "/"
+                        ),
+                    }
+                    for job in downloaded_jobs
+                ],
+                "failed_jobs": [
+                    {
+                        "video_url": job.video_url,
+                        "browsertime_json_url": job.json_url,
+                    }
+                    for job in failed_jobs
+                ],
+            },
+            f,
+        )
+
+    subprocess.check_output(
+        [
+            "tar",
+            "cJf",
+            str(OUTPUT_DIR / "visual-metrics.tar.xz"),
+            "-C",
+            str(WORKSPACE_DIR),
+            ".",
+        ]
+    )
+
+
+def download_inputs(log, raw_jobs):
+    """Download the inputs for all jobs in parallel.
+
+    Args:
+        log: The structlog logger instance.
+        raw_jobs: The list of unprocessed jobs from the ``jobs.json`` input file.
+
+    Returns:
+        A tuple of the successfully downloaded jobs and the failed to download jobs.
+    """
+    WORKSPACE_DIR.mkdir(parents=True, exist_ok=True)
+
+    pending_jobs = []
+    for i, job in enumerate(raw_jobs):
+        job_dir = WORKSPACE_JOBS_DIR / str(i)
+        job_dir.mkdir(parents=True, exist_ok=True)
+
+        pending_jobs.append(
+            Job(
+                job_dir,
+                job_dir / "browsertime.json",
+                job["browsertime_json_url"],
+                job_dir / "video",
+                job["video_url"],
+            )
+        )
+
+    downloaded_jobs = []
+    failed_jobs = []
+
+    with ThreadPoolExecutor(max_workers=8) as executor:
+        for job, success in executor.map(
+            partial(download_job, log), pending_jobs
+        ):
+            if success:
+                downloaded_jobs.append(job)
+            else:
+                job.job_dir.rmdir()
+                failed_jobs.append(job)
+
+    return downloaded_jobs, failed_jobs
+
+
+def download_job(log, job):
+    """Download the files for a given job.
+
+    Args:
+        log: The structlog logger instance.
+        job: The job to download.
+
+    Returns:
+        A tuple of the job and whether or not the download was successful.
+
+        The returned job will be updated so that it's :attr:`Job.video_path`
+        attribute is updated to match the file path given by the video file
+        in the ``browsertime.json`` file.
+    """
+    log = log.bind(json_url=job.json_url)
+    try:
+        download(job.video_url, job.video_path)
+        download(job.json_url, job.json_path)
+    except Exception as e:
+        log.error(
+            "Failed to download files for job: %s" % e,
+            video_url=job.video_url,
+            exc_info=True,
+        )
+        return job, False
+
+    try:
+        with job.json_path.open("r") as f:
+            browsertime_json = json.load(f)
+    except OSError as e:
+        log.error("Could not read browsertime.json: %s" % e)
+        return job, False
+    except ValueError as e:
+        log.error("Could not parse browsertime.json as JSON: %s" % e)
+        return job, False
+
+    try:
+        video_path = job.job_dir / browsertime_json[0]["files"]["video"][0]
+    except KeyError:
+        log.error("Could not read video path from browsertime.json file")
+        return job, False
+
+    video_path.parent.mkdir(parents=True, exist_ok=True)
+
+    job.video_path.rename(video_path)
+    job.video_path = video_path
+
+    return job, True
+
+
+def download(url, path):
+    """Download the resource at the given URL to the local path.
+
+    Args:
+        url: The URL of the resource to download.
+        path: The local path to download the resource to.
+
+    Raises:
+        OSError:
+            Raised if an IO error occurs while writing the file.
+
+        requests.exceptions.HTTPError:
+            Raised when an HTTP error (including e.g., HTTP 404) occurs.
+    """
+    request = requests.get(url, stream=True)
+    request.raise_for_status()
+
+    path.parent.mkdir(parents=True, exist_ok=True)
+
+    with path.open("wb") as f:
+        for chunk in request:
+            f.write(chunk)
+
+
+def run_visual_metrics(job, visualmetrics_path, options):
+    """Run visualmetrics.py on the input job.
+
+    Returns:
+       Either a string containing the JSON output of visualmetrics.py or an
+       exception raised by :func:`subprocess.check_output`.
+    """
+    cmd = [
+        "/usr/bin/python",
+        str(visualmetrics_path),
+        "--video",
+        str(job.video_path),
+    ]
+
+    cmd.extend(options)
+
+    try:
+        return subprocess.check_output(cmd)
+    except subprocess.CalledProcessError as e:
+        return e
+
+
+if __name__ == "__main__":
+    structlog.configure(
+        processors=[
+            structlog.processors.TimeStamper(fmt="iso"),
+            structlog.processors.format_exc_info,
+            structlog.dev.ConsoleRenderer(colors=False),
+        ],
+        cache_logger_on_first_use=True,
+    )
+
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument(
+        "--jobs-json-path",
+        type=Path,
+        metavar="PATH",
+        help=(
+            "The path to the jobs.josn file. If not present, the "
+            "VISUAL_METRICS_JOBS_JSON environment variable will be used "
+            "instead."
+        ),
+    )
+    parser.add_argument(
+        "visual_metrics_options",
+        type=str,
+        metavar="VISUAL-METRICS-OPTIONS",
+        help="Options to pass to visualmetrics.py",
+        nargs="*",
+    )
+
+    args = parser.parse_args()
+    log = structlog.get_logger()
+
+    try:
+        sys.exit(main(log, args))
+    except Exception as e:
+        log.error("Unhandled exception: %s" % e, exc_info=True)
+        sys.exit(1)
--- a/taskcluster/docs/kinds.rst
+++ b/taskcluster/docs/kinds.rst
@ -598,3 +598,8 @@ test archive into it's own archive.
 geckodriver-signing
 -------------------
 Signing for geckodriver binary.
+
+visual-metrics
+--------------
+Tasks that compute visual performance metrics from videos and images captured
+by other tasks.
--- a/tools/lint/py2.yml
+++ b/tools/lint/py2.yml
@ -19,6 +19,7 @@ py2:
        - security
        - servo
        - taskcluster/docker/funsize-update-generator
+        - taskcluster/docker/visual-metrics
        - testing/gtest
        - testing/mochitest
        - testing/mozharness