From 9705cbd3c73856c2de8e34e9b42d77c981235e0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tarek=20Ziad=C3=A9?= <tarek@mozilla.com>
Date: Tue, 26 Nov 2019 18:19:23 +0000
Subject: [PATCH] Bug 1595789 - zip browsertime results r=rwood,sparky,barret

Instead of using a json file that contains URLs to task cluster artifacts, let's use a single zip file that contains all video files to analyze. This will reduce the number of artifacts we generate per raptor run and the number of network downloads.

The run-visual-metrics gets the tarball directly when it's used as a sub-task and produces its own result tarball

notice that at this point I don't see any reason to copy in the vismet results tarball all the files.

Maybe we should simply generate a *single* json file that contains a merge of all visualmetrics results?
But that could be done later. lmk

Differential Revision: https://phabricator.services.mozilla.com/D52907

--HG--
extra : moz-landing-system : lando
---
 taskcluster/ci/test/raptor.yml                |   2 +-
 taskcluster/ci/visual-metrics-dep/kind.yml    |   2 +-
 .../visual-metrics/run-visual-metrics.py      | 158 +++++++++---------
 taskcluster/taskgraph/decision.py             |   6 +-
 .../transforms/visual_metrics_dep.py          |   2 +-
 taskcluster/taskgraph/util/schema.py          |   2 +-
 testing/raptor/raptor/raptor.py               |  11 ++
 testing/raptor/raptor/results.py              |  50 +++---
 8 files changed, 128 insertions(+), 105 deletions(-)

diff --git a/taskcluster/ci/test/raptor.yml b/taskcluster/ci/test/raptor.yml
index 3db66f497551..c469907e7349 100644
--- a/taskcluster/ci/test/raptor.yml
+++ b/taskcluster/ci/test/raptor.yml
@@ -1041,7 +1041,7 @@ browsertime-tp6-1:
     run-visual-metrics:
         by-app:
             chrome: false
-            default: false
+            default: true
     mozharness:
         extra-options:
             - --browsertime
diff --git a/taskcluster/ci/visual-metrics-dep/kind.yml b/taskcluster/ci/visual-metrics-dep/kind.yml
index fa2e8ba85ec9..262ab5fb26ba 100644
--- a/taskcluster/ci/visual-metrics-dep/kind.yml
+++ b/taskcluster/ci/visual-metrics-dep/kind.yml
@@ -39,5 +39,5 @@ job-template:
             - visual-metrics
     run:
         using: run-task
-        command: /builds/worker/bin/run-visual-metrics.py --jobs-json-path /builds/worker/fetches/jobs.json -- --orange --perceptual --contentful --force --renderignore 5 --json --viewport
+        command: /builds/worker/bin/run-visual-metrics.py --browsertime-results /builds/worker/fetches/browsertime-results.tgz -- --orange --perceptual --contentful --force --renderignore 5 --json --viewport
         checkout: false
diff --git a/taskcluster/docker/visual-metrics/run-visual-metrics.py b/taskcluster/docker/visual-metrics/run-visual-metrics.py
index d531fcc28070..f9ef7d095a5b 100644
--- a/taskcluster/docker/visual-metrics/run-visual-metrics.py
+++ b/taskcluster/docker/visual-metrics/run-visual-metrics.py
@@ -5,20 +5,14 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 """Instrument visualmetrics.py to run in parallel.
-
-Environment variables:
-
-  VISUAL_METRICS_JOBS_JSON:
-    A JSON blob containing the job descriptions.
-
-    Can be overridden with the --jobs-json-path option set to a local file
-    path.
 """
 
 import argparse
 import os
 import json
+import shutil
 import sys
+import tarfile
 from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
 from functools import partial
 from multiprocessing import cpu_count
@@ -28,7 +22,7 @@ import attr
 import requests
 import structlog
 import subprocess
-from voluptuous import Required, Schema, Url
+from voluptuous import Required, Schema
 
 #: The workspace directory where files will be downloaded, etc.
 WORKSPACE_DIR = Path("/", "builds", "worker", "workspace")
@@ -48,14 +42,14 @@ class Job:
     #: json_path: The path to the ``browsertime.json`` file on disk.
     json_path = attr.ib(type=Path)
 
-    #: json_url: The URL of the ``browsertime.json`` file.
-    json_url = attr.ib(type=str)
+    #: json_location: The location or URL of the ``browsertime.json`` file.
+    json_location = attr.ib(type=str)
 
     #: video_path: The path of the video file on disk.
     video_path = attr.ib(type=Path)
 
-    #: video_url: The URl of the video file.
-    video_url = attr.ib(type=str)
+    #: video_location: The path or URL of the video file.
+    video_location = attr.ib(type=str)
 
 
 # NB: Keep in sync with try_task_config_schema in
@@ -64,7 +58,7 @@ class Job:
 JOB_SCHEMA = Schema(
     {
         Required("jobs"): [
-            {Required("browsertime_json_url"): Url(), Required("video_url"): Url()}
+            {Required("json_location"): str, Required("video_location"): str}
         ]
     }
 )
@@ -86,8 +80,7 @@ def run_command(log, cmd):
         log.info("Command succeeded", result=res)
         return 0, res
     except subprocess.CalledProcessError as e:
-        log.info("Command failed", cmd=cmd, status=e.returncode,
-                 output=e.output)
+        log.info("Command failed", cmd=cmd, status=e.returncode, output=e.output)
         return e.returncode, e.output
 
 
@@ -109,50 +102,33 @@ def main(log, args):
     visualmetrics_path = Path(fetch_dir) / "visualmetrics.py"
     if not visualmetrics_path.exists():
         log.error(
-            "Could not locate visualmetrics.py",
-            expected_path=str(visualmetrics_path)
+            "Could not locate visualmetrics.py", expected_path=str(visualmetrics_path)
         )
         return 1
 
-    if args.jobs_json_path:
-        try:
-            with open(str(args.jobs_json_path), "r") as f:
-                jobs_json = json.load(f)
-        except Exception as e:
-            log.error(
-                "Could not read jobs.json file: %s" % e,
-                path=args.jobs_json_path,
-                exc_info=True,
-            )
-            return 1
-
-        log.info(
-            "Loaded jobs.json from file", path=args.jobs_json_path, jobs_json=jobs_json
+    results_path = Path(args.browsertime_results).parent
+    try:
+        with tarfile.open(str(args.browsertime_results)) as tar:
+            tar.extractall(path=str(results_path))
+    except Exception:
+        log.error(
+            "Could not read extract browsertime results archive",
+            path=args.browsertime_results,
+            exc_info=True,
         )
+        return 1
+    log.info("Extracted browsertime results", path=args.browsertime_results)
+    jobs_json_path = results_path / "browsertime-results" / "jobs.json"
+    try:
+        with open(str(jobs_json_path), "r") as f:
+            jobs_json = json.load(f)
+    except Exception as e:
+        log.error(
+            "Could not read jobs.json file: %s" % e, path=jobs_json_path, exc_info=True
+        )
+        return 1
 
-    else:
-        raw_jobs_json = os.getenv("VISUAL_METRICS_JOBS_JSON")
-        if raw_jobs_json is not None and isinstance(raw_jobs_json, bytes):
-            raw_jobs_json = raw_jobs_json.decode("utf-8")
-        elif raw_jobs_json is None:
-            log.error(
-                "Expected one of --jobs-json-path or "
-                "VISUAL_METRICS_JOBS_JSON environment variable."
-            )
-            return 1
-
-        try:
-            jobs_json = json.loads(raw_jobs_json)
-        except (TypeError, ValueError) as e:
-            log.error(
-                "Failed to decode VISUAL_METRICS_JOBS_JSON environment "
-                "variable: %s" % e,
-                value=raw_jobs_json,
-            )
-            return 1
-
-        log.info("Parsed jobs.json from environment", jobs_json=jobs_json)
-
+    log.info("Loaded jobs.json from file", path=jobs_json_path, jobs_json=jobs_json)
     try:
         JOB_SCHEMA(jobs_json)
     except Exception as e:
@@ -165,6 +141,8 @@ def main(log, args):
         log.error("Failed to download jobs: %s" % e, exc_info=True)
         return 1
 
+    runs_failed = 0
+
     with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
         for job, result in zip(
             downloaded_jobs,
@@ -180,8 +158,11 @@ def main(log, args):
             returncode, res = result
             if returncode != 0:
                 log.error(
-                    "Failed to run visualmetrics.py", video_url=job.video_url, error=res
+                    "Failed to run visualmetrics.py",
+                    video_location=job.video_location,
+                    error=res,
                 )
+                runs_failed += 1
             else:
                 path = job.job_dir / "visual-metrics.json"
                 with path.open("wb") as f:
@@ -195,28 +176,35 @@ def main(log, args):
             {
                 "successful_jobs": [
                     {
-                        "video_url": job.video_url,
-                        "browsertime_json_url": job.json_url,
+                        "video_location": job.video_location,
+                        "json_location": job.json_location,
                         "path": (str(job.job_dir.relative_to(WORKSPACE_DIR)) + "/"),
                     }
                     for job in downloaded_jobs
                 ],
                 "failed_jobs": [
-                    {"video_url": job.video_url, "browsertime_json_url": job.json_url}
+                    {
+                        "video_location": job.video_location,
+                        "json_location": job.json_location,
+                    }
                     for job in failed_jobs
                 ],
             },
             f,
         )
 
-    tarfile = OUTPUT_DIR / "visual-metrics.tar.xz"
-    log.info("Creating the tarfile", tarfile=tarfile)
+    archive = OUTPUT_DIR / "visual-metrics.tar.xz"
+    log.info("Creating the tarfile", tarfile=archive)
     returncode, res = run_command(
-        log, ["tar", "cJf", str(tarfile), "-C", str(WORKSPACE_DIR), "."]
+        log, ["tar", "cJf", str(archive), "-C", str(WORKSPACE_DIR), "."]
     )
     if returncode != 0:
         raise Exception("Could not tar the results")
 
+    # If there's one failure along the way, we want to return > 0
+    # to trigger a red job in TC.
+    return len(failed_jobs) + runs_failed
+
 
 def download_inputs(log, raw_jobs):
     """Download the inputs for all jobs in parallel.
@@ -234,14 +222,13 @@ def download_inputs(log, raw_jobs):
     for i, job in enumerate(raw_jobs):
         job_dir = WORKSPACE_JOBS_DIR / str(i)
         job_dir.mkdir(parents=True, exist_ok=True)
-
         pending_jobs.append(
             Job(
                 job_dir,
                 job_dir / "browsertime.json",
-                job["browsertime_json_url"],
+                job["json_location"],
                 job_dir / "video",
-                job["video_url"],
+                job["video_location"],
             )
         )
 
@@ -273,14 +260,15 @@ def download_job(log, job):
         attribute is updated to match the file path given by the video file
         in the ``browsertime.json`` file.
     """
-    log = log.bind(json_url=job.json_url)
+    fetch_dir = Path(os.getenv("MOZ_FETCHES_DIR"))
+    log = log.bind(json_location=job.json_location)
     try:
-        download(job.video_url, job.video_path)
-        download(job.json_url, job.json_path)
+        download_or_copy(fetch_dir / job.video_location, job.video_path)
+        download_or_copy(fetch_dir / job.json_location, job.json_path)
     except Exception as e:
         log.error(
             "Failed to download files for job: %s" % e,
-            video_url=job.video_url,
+            video_location=job.video_location,
             exc_info=True,
         )
         return job, False
@@ -309,6 +297,27 @@ def download_job(log, job):
     return job, True
 
 
+def download_or_copy(url_or_location, path):
+    """Download the resource at the given URL or path to the local path.
+
+    Args:
+        url_or_location: The URL or path of the resource to download or copy.
+        path: The local path to download or copy the resource to.
+
+    Raises:
+        OSError:
+            Raised if an IO error occurs while writing the file.
+
+        requests.exceptions.HTTPError:
+            Raised when an HTTP error (including e.g., HTTP 404) occurs.
+    """
+    url_or_location = str(url_or_location)
+    if os.path.exists(url_or_location):
+        shutil.copyfile(url_or_location, str(path))
+        return
+    download(url_or_location, path)
+
+
 def download(url, path):
     """Download the resource at the given URL to the local path.
 
@@ -357,16 +366,15 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser(
         description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
     )
+
     parser.add_argument(
-        "--jobs-json-path",
+        "--browsertime-results",
         type=Path,
         metavar="PATH",
-        help=(
-            "The path to the jobs.json file. If not present, the "
-            "VISUAL_METRICS_JOBS_JSON environment variable will be used "
-            "instead."
-        )
+        help="The path to the browsertime results tarball.",
+        required=True,
     )
+
     parser.add_argument(
         "visual_metrics_options",
         type=str,
diff --git a/taskcluster/taskgraph/decision.py b/taskcluster/taskgraph/decision.py
index 1ffacc29255d..93b9afc3785a 100644
--- a/taskcluster/taskgraph/decision.py
+++ b/taskcluster/taskgraph/decision.py
@@ -27,7 +27,7 @@ from .util.schema import validate_schema, Schema
 from .util.taskcluster import get_artifact
 from .util.taskgraph import find_decision_task, find_existing_tasks_from_previous_kinds
 from .util.yaml import load_yaml
-from voluptuous import Required, Optional, Url
+from voluptuous import Required, Optional
 
 
 logger = logging.getLogger(__name__)
@@ -120,8 +120,8 @@ PER_PROJECT_PARAMETERS = {
 visual_metrics_jobs_schema = Schema({
         Required('jobs'): [
             {
-                Required('browsertime_json_url'): Url(),
-                Required('video_url'): Url(),
+                Required('json_location'): str,
+                Required('video_location'): str,
             }
         ]
 })
diff --git a/taskcluster/taskgraph/transforms/visual_metrics_dep.py b/taskcluster/taskgraph/transforms/visual_metrics_dep.py
index a8f1c1e8a8bf..9eef8c4909cb 100644
--- a/taskcluster/taskgraph/transforms/visual_metrics_dep.py
+++ b/taskcluster/taskgraph/transforms/visual_metrics_dep.py
@@ -23,7 +23,7 @@ def run_visual_metrics(config, jobs):
         if dep_job is not None:
             platform = dep_job.task['extra']['treeherder-platform']
             job['dependencies'] = {dep_job.label: dep_job.label}
-            job['fetches'][dep_job.label] = ['/public/test_info/jobs.json']
+            job['fetches'][dep_job.label] = ['/public/test_info/browsertime-results.tgz']
             attributes = dict(dep_job.attributes)
             attributes['platform'] = platform
             job['label'] = LABEL % attributes
diff --git a/taskcluster/taskgraph/util/schema.py b/taskcluster/taskgraph/util/schema.py
index d90705897069..ca01b13a1d75 100644
--- a/taskcluster/taskgraph/util/schema.py
+++ b/taskcluster/taskgraph/util/schema.py
@@ -129,7 +129,7 @@ def resolve_keyed_by(item, field, item_name, **extra_values):
 WHITELISTED_SCHEMA_IDENTIFIERS = [
     # upstream-artifacts are handed directly to scriptWorker, which expects interCaps
     lambda path: "[u'upstream-artifacts']" in path,
-    lambda path: "[u'browsertime_json_url']" in path or "[u'video_url']" in path,
+    lambda path: "[u'json_location']" in path or "[u'video_location']" in path,
 ]
 
 
diff --git a/testing/raptor/raptor/raptor.py b/testing/raptor/raptor/raptor.py
index f8634cea72df..88370d4b9a6e 100644
--- a/testing/raptor/raptor/raptor.py
+++ b/testing/raptor/raptor/raptor.py
@@ -18,6 +18,7 @@ import subprocess
 import sys
 import tempfile
 import time
+import tarfile
 
 import requests
 
@@ -1798,6 +1799,16 @@ def main(args=sys.argv[1:]):
             LOG.critical(" ".join("%s: %s" % (subject, msg) for subject, msg in message))
         os.sys.exit(1)
 
+    # if we're running browsertime in the CI, we want to zip the result dir
+    if args.browsertime and not args.run_local:
+        result_dir = raptor.results_handler.result_dir()
+        if os.path.exists(result_dir):
+            LOG.info("Creating tarball at %s" % result_dir + ".tgz")
+            with tarfile.open(result_dir + ".tgz", "w:gz") as tar:
+                tar.add(result_dir, arcname=os.path.basename(result_dir))
+            LOG.info("Removing %s" % result_dir)
+            shutil.rmtree(result_dir)
+
     # when running raptor locally with gecko profiling on, use the view-gecko-profile
     # tool to automatically load the latest gecko profile in profiler.firefox.com
     if args.gecko_profile and args.run_local:
diff --git a/testing/raptor/raptor/results.py b/testing/raptor/raptor/results.py
index 6983c85c81bb..a179d8575093 100644
--- a/testing/raptor/raptor/results.py
+++ b/testing/raptor/raptor/results.py
@@ -226,6 +226,9 @@ class BrowsertimeResultsHandler(PerftestResultsHandler):
         super(BrowsertimeResultsHandler, self).__init__(**config)
         self._root_results_dir = root_results_dir
 
+    def result_dir(self):
+        return self._root_results_dir
+
     def result_dir_for_test(self, test):
         return os.path.join(self._root_results_dir, test['name'])
 
@@ -436,28 +439,28 @@ class BrowsertimeResultsHandler(PerftestResultsHandler):
 
         return results
 
-    def _extract_vmetrics_jobs(self, test, browsertime_json, browsertime_results):
-        # XXX will do better later
-        url = ("{root_url}/api/queue/v1/task/{task_id}/runs/0/artifacts/public/"
-               "test_info/".format(
-                   root_url=os.environ.get('TASKCLUSTER_ROOT_URL', 'taskcluster-root-url.invalid'),
-                   task_id=os.environ.get("TASK_ID", "??"),
-               ))
+    def _extract_vmetrics(self, browsertime_json, browsertime_results):
+        # The visual metrics task expects posix paths.
+        def _normalized_join(*args):
+            path = os.path.join(*args)
+            return path.replace(os.path.sep, "/")
 
-        json_url = url + "/".join(browsertime_json.split(os.path.sep)[-3:])
-        files = []
-        for res in browsertime_results:
-            files.extend(res.get("files", {}).get("video", []))
-        if len(files) == 0:
-            # no video files.
-            return None
         name = browsertime_json.split(os.path.sep)[-2]
-        result = []
-        for file in files:
-            video_url = url + "browsertime-results/" + name + "/" + file
-            result.append({"browsertime_json_url": json_url,
-                           "video_url": video_url})
-        return result
+        reldir = _normalized_join("browsertime-results", name)
+
+        def _extract_metrics(res):
+            # extracts the video files in one result and send back the
+            # mapping expected by the visual metrics task
+            vfiles = res.get("files", {}).get("video", [])
+            return [{"json_location": _normalized_join(reldir, "browsertime.json"),
+                     "video_location": _normalized_join(reldir, vfile)}
+                    for vfile in vfiles]
+
+        vmetrics = []
+        for res in browsertime_results:
+            vmetrics.extend(_extract_metrics(res))
+
+        return len(vmetrics) > 0 and vmetrics or None
 
     def summarize_and_output(self, test_config, tests, test_names):
         """
@@ -508,7 +511,7 @@ class BrowsertimeResultsHandler(PerftestResultsHandler):
                 raise
 
             if not run_local:
-                video_files = self._extract_vmetrics_jobs(test, bt_res_json, raw_btresults)
+                video_files = self._extract_vmetrics(bt_res_json, raw_btresults)
                 if video_files:
                     video_jobs.extend(video_files)
 
@@ -586,9 +589,10 @@ class BrowsertimeResultsHandler(PerftestResultsHandler):
         if not self.gecko_profile:
             validate_success = self._validate_treeherder_data(output, out_perfdata)
 
-        # Dumping the video list for the visual metrics task.
+        # Dumping the video list for the visual metrics task at the root of
+        # the browsertime results dir.
         if len(video_jobs) > 0:
-            jobs_file = os.path.join(test_config["artifact_dir"], "jobs.json")
+            jobs_file = os.path.join(self.result_dir(), "jobs.json")
             LOG.info("Writing %d video jobs into %s" % (len(video_jobs), jobs_file))
             with open(jobs_file, "w") as f:
                 f.write(json.dumps({"jobs": video_jobs}))