Bug 1595789 - zip browsertime results r=rwood,sparky,barret

Instead of using a json file that contains URLs to task cluster artifacts, let's use a single zip file that contains all video files to analyze. This will reduce the number of artifacts we generate per raptor run and the number of network downloads.

The run-visual-metrics gets the tarball directly when it's used as a sub-task and produces its own result tarball

notice that at this point I don't see any reason to copy in the vismet results tarball all the files.

Maybe we should simply generate a *single* json file that contains a merge of all visualmetrics results?
But that could be done later. lmk

Differential Revision: https://phabricator.services.mozilla.com/D52907

--HG--
extra : moz-landing-system : lando
This commit is contained in:
Tarek Ziadé 2019-11-26 18:19:23 +00:00
Родитель 4c59d38eed
Коммит 9705cbd3c7
8 изменённых файлов: 128 добавлений и 105 удалений

Просмотреть файл

@ -1041,7 +1041,7 @@ browsertime-tp6-1:
run-visual-metrics: run-visual-metrics:
by-app: by-app:
chrome: false chrome: false
default: false default: true
mozharness: mozharness:
extra-options: extra-options:
- --browsertime - --browsertime

Просмотреть файл

@ -39,5 +39,5 @@ job-template:
- visual-metrics - visual-metrics
run: run:
using: run-task using: run-task
command: /builds/worker/bin/run-visual-metrics.py --jobs-json-path /builds/worker/fetches/jobs.json -- --orange --perceptual --contentful --force --renderignore 5 --json --viewport command: /builds/worker/bin/run-visual-metrics.py --browsertime-results /builds/worker/fetches/browsertime-results.tgz -- --orange --perceptual --contentful --force --renderignore 5 --json --viewport
checkout: false checkout: false

Просмотреть файл

@ -5,20 +5,14 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/. # file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""Instrument visualmetrics.py to run in parallel. """Instrument visualmetrics.py to run in parallel.
Environment variables:
VISUAL_METRICS_JOBS_JSON:
A JSON blob containing the job descriptions.
Can be overridden with the --jobs-json-path option set to a local file
path.
""" """
import argparse import argparse
import os import os
import json import json
import shutil
import sys import sys
import tarfile
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from functools import partial from functools import partial
from multiprocessing import cpu_count from multiprocessing import cpu_count
@ -28,7 +22,7 @@ import attr
import requests import requests
import structlog import structlog
import subprocess import subprocess
from voluptuous import Required, Schema, Url from voluptuous import Required, Schema
#: The workspace directory where files will be downloaded, etc. #: The workspace directory where files will be downloaded, etc.
WORKSPACE_DIR = Path("/", "builds", "worker", "workspace") WORKSPACE_DIR = Path("/", "builds", "worker", "workspace")
@ -48,14 +42,14 @@ class Job:
#: json_path: The path to the ``browsertime.json`` file on disk. #: json_path: The path to the ``browsertime.json`` file on disk.
json_path = attr.ib(type=Path) json_path = attr.ib(type=Path)
#: json_url: The URL of the ``browsertime.json`` file. #: json_location: The location or URL of the ``browsertime.json`` file.
json_url = attr.ib(type=str) json_location = attr.ib(type=str)
#: video_path: The path of the video file on disk. #: video_path: The path of the video file on disk.
video_path = attr.ib(type=Path) video_path = attr.ib(type=Path)
#: video_url: The URl of the video file. #: video_location: The path or URL of the video file.
video_url = attr.ib(type=str) video_location = attr.ib(type=str)
# NB: Keep in sync with try_task_config_schema in # NB: Keep in sync with try_task_config_schema in
@ -64,7 +58,7 @@ class Job:
JOB_SCHEMA = Schema( JOB_SCHEMA = Schema(
{ {
Required("jobs"): [ Required("jobs"): [
{Required("browsertime_json_url"): Url(), Required("video_url"): Url()} {Required("json_location"): str, Required("video_location"): str}
] ]
} }
) )
@ -86,8 +80,7 @@ def run_command(log, cmd):
log.info("Command succeeded", result=res) log.info("Command succeeded", result=res)
return 0, res return 0, res
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
log.info("Command failed", cmd=cmd, status=e.returncode, log.info("Command failed", cmd=cmd, status=e.returncode, output=e.output)
output=e.output)
return e.returncode, e.output return e.returncode, e.output
@ -109,50 +102,33 @@ def main(log, args):
visualmetrics_path = Path(fetch_dir) / "visualmetrics.py" visualmetrics_path = Path(fetch_dir) / "visualmetrics.py"
if not visualmetrics_path.exists(): if not visualmetrics_path.exists():
log.error( log.error(
"Could not locate visualmetrics.py", "Could not locate visualmetrics.py", expected_path=str(visualmetrics_path)
expected_path=str(visualmetrics_path)
) )
return 1 return 1
if args.jobs_json_path: results_path = Path(args.browsertime_results).parent
try: try:
with open(str(args.jobs_json_path), "r") as f: with tarfile.open(str(args.browsertime_results)) as tar:
jobs_json = json.load(f) tar.extractall(path=str(results_path))
except Exception as e: except Exception:
log.error( log.error(
"Could not read jobs.json file: %s" % e, "Could not read extract browsertime results archive",
path=args.jobs_json_path, path=args.browsertime_results,
exc_info=True, exc_info=True,
) )
return 1 return 1
log.info("Extracted browsertime results", path=args.browsertime_results)
log.info( jobs_json_path = results_path / "browsertime-results" / "jobs.json"
"Loaded jobs.json from file", path=args.jobs_json_path, jobs_json=jobs_json
)
else:
raw_jobs_json = os.getenv("VISUAL_METRICS_JOBS_JSON")
if raw_jobs_json is not None and isinstance(raw_jobs_json, bytes):
raw_jobs_json = raw_jobs_json.decode("utf-8")
elif raw_jobs_json is None:
log.error(
"Expected one of --jobs-json-path or "
"VISUAL_METRICS_JOBS_JSON environment variable."
)
return 1
try: try:
jobs_json = json.loads(raw_jobs_json) with open(str(jobs_json_path), "r") as f:
except (TypeError, ValueError) as e: jobs_json = json.load(f)
except Exception as e:
log.error( log.error(
"Failed to decode VISUAL_METRICS_JOBS_JSON environment " "Could not read jobs.json file: %s" % e, path=jobs_json_path, exc_info=True
"variable: %s" % e,
value=raw_jobs_json,
) )
return 1 return 1
log.info("Parsed jobs.json from environment", jobs_json=jobs_json) log.info("Loaded jobs.json from file", path=jobs_json_path, jobs_json=jobs_json)
try: try:
JOB_SCHEMA(jobs_json) JOB_SCHEMA(jobs_json)
except Exception as e: except Exception as e:
@ -165,6 +141,8 @@ def main(log, args):
log.error("Failed to download jobs: %s" % e, exc_info=True) log.error("Failed to download jobs: %s" % e, exc_info=True)
return 1 return 1
runs_failed = 0
with ProcessPoolExecutor(max_workers=cpu_count()) as executor: with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
for job, result in zip( for job, result in zip(
downloaded_jobs, downloaded_jobs,
@ -180,8 +158,11 @@ def main(log, args):
returncode, res = result returncode, res = result
if returncode != 0: if returncode != 0:
log.error( log.error(
"Failed to run visualmetrics.py", video_url=job.video_url, error=res "Failed to run visualmetrics.py",
video_location=job.video_location,
error=res,
) )
runs_failed += 1
else: else:
path = job.job_dir / "visual-metrics.json" path = job.job_dir / "visual-metrics.json"
with path.open("wb") as f: with path.open("wb") as f:
@ -195,28 +176,35 @@ def main(log, args):
{ {
"successful_jobs": [ "successful_jobs": [
{ {
"video_url": job.video_url, "video_location": job.video_location,
"browsertime_json_url": job.json_url, "json_location": job.json_location,
"path": (str(job.job_dir.relative_to(WORKSPACE_DIR)) + "/"), "path": (str(job.job_dir.relative_to(WORKSPACE_DIR)) + "/"),
} }
for job in downloaded_jobs for job in downloaded_jobs
], ],
"failed_jobs": [ "failed_jobs": [
{"video_url": job.video_url, "browsertime_json_url": job.json_url} {
"video_location": job.video_location,
"json_location": job.json_location,
}
for job in failed_jobs for job in failed_jobs
], ],
}, },
f, f,
) )
tarfile = OUTPUT_DIR / "visual-metrics.tar.xz" archive = OUTPUT_DIR / "visual-metrics.tar.xz"
log.info("Creating the tarfile", tarfile=tarfile) log.info("Creating the tarfile", tarfile=archive)
returncode, res = run_command( returncode, res = run_command(
log, ["tar", "cJf", str(tarfile), "-C", str(WORKSPACE_DIR), "."] log, ["tar", "cJf", str(archive), "-C", str(WORKSPACE_DIR), "."]
) )
if returncode != 0: if returncode != 0:
raise Exception("Could not tar the results") raise Exception("Could not tar the results")
# If there's one failure along the way, we want to return > 0
# to trigger a red job in TC.
return len(failed_jobs) + runs_failed
def download_inputs(log, raw_jobs): def download_inputs(log, raw_jobs):
"""Download the inputs for all jobs in parallel. """Download the inputs for all jobs in parallel.
@ -234,14 +222,13 @@ def download_inputs(log, raw_jobs):
for i, job in enumerate(raw_jobs): for i, job in enumerate(raw_jobs):
job_dir = WORKSPACE_JOBS_DIR / str(i) job_dir = WORKSPACE_JOBS_DIR / str(i)
job_dir.mkdir(parents=True, exist_ok=True) job_dir.mkdir(parents=True, exist_ok=True)
pending_jobs.append( pending_jobs.append(
Job( Job(
job_dir, job_dir,
job_dir / "browsertime.json", job_dir / "browsertime.json",
job["browsertime_json_url"], job["json_location"],
job_dir / "video", job_dir / "video",
job["video_url"], job["video_location"],
) )
) )
@ -273,14 +260,15 @@ def download_job(log, job):
attribute is updated to match the file path given by the video file attribute is updated to match the file path given by the video file
in the ``browsertime.json`` file. in the ``browsertime.json`` file.
""" """
log = log.bind(json_url=job.json_url) fetch_dir = Path(os.getenv("MOZ_FETCHES_DIR"))
log = log.bind(json_location=job.json_location)
try: try:
download(job.video_url, job.video_path) download_or_copy(fetch_dir / job.video_location, job.video_path)
download(job.json_url, job.json_path) download_or_copy(fetch_dir / job.json_location, job.json_path)
except Exception as e: except Exception as e:
log.error( log.error(
"Failed to download files for job: %s" % e, "Failed to download files for job: %s" % e,
video_url=job.video_url, video_location=job.video_location,
exc_info=True, exc_info=True,
) )
return job, False return job, False
@ -309,6 +297,27 @@ def download_job(log, job):
return job, True return job, True
def download_or_copy(url_or_location, path):
"""Download the resource at the given URL or path to the local path.
Args:
url_or_location: The URL or path of the resource to download or copy.
path: The local path to download or copy the resource to.
Raises:
OSError:
Raised if an IO error occurs while writing the file.
requests.exceptions.HTTPError:
Raised when an HTTP error (including e.g., HTTP 404) occurs.
"""
url_or_location = str(url_or_location)
if os.path.exists(url_or_location):
shutil.copyfile(url_or_location, str(path))
return
download(url_or_location, path)
def download(url, path): def download(url, path):
"""Download the resource at the given URL to the local path. """Download the resource at the given URL to the local path.
@ -357,16 +366,15 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
) )
parser.add_argument( parser.add_argument(
"--jobs-json-path", "--browsertime-results",
type=Path, type=Path,
metavar="PATH", metavar="PATH",
help=( help="The path to the browsertime results tarball.",
"The path to the jobs.json file. If not present, the " required=True,
"VISUAL_METRICS_JOBS_JSON environment variable will be used "
"instead."
)
) )
parser.add_argument( parser.add_argument(
"visual_metrics_options", "visual_metrics_options",
type=str, type=str,

Просмотреть файл

@ -27,7 +27,7 @@ from .util.schema import validate_schema, Schema
from .util.taskcluster import get_artifact from .util.taskcluster import get_artifact
from .util.taskgraph import find_decision_task, find_existing_tasks_from_previous_kinds from .util.taskgraph import find_decision_task, find_existing_tasks_from_previous_kinds
from .util.yaml import load_yaml from .util.yaml import load_yaml
from voluptuous import Required, Optional, Url from voluptuous import Required, Optional
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -120,8 +120,8 @@ PER_PROJECT_PARAMETERS = {
visual_metrics_jobs_schema = Schema({ visual_metrics_jobs_schema = Schema({
Required('jobs'): [ Required('jobs'): [
{ {
Required('browsertime_json_url'): Url(), Required('json_location'): str,
Required('video_url'): Url(), Required('video_location'): str,
} }
] ]
}) })

Просмотреть файл

@ -23,7 +23,7 @@ def run_visual_metrics(config, jobs):
if dep_job is not None: if dep_job is not None:
platform = dep_job.task['extra']['treeherder-platform'] platform = dep_job.task['extra']['treeherder-platform']
job['dependencies'] = {dep_job.label: dep_job.label} job['dependencies'] = {dep_job.label: dep_job.label}
job['fetches'][dep_job.label] = ['/public/test_info/jobs.json'] job['fetches'][dep_job.label] = ['/public/test_info/browsertime-results.tgz']
attributes = dict(dep_job.attributes) attributes = dict(dep_job.attributes)
attributes['platform'] = platform attributes['platform'] = platform
job['label'] = LABEL % attributes job['label'] = LABEL % attributes

Просмотреть файл

@ -129,7 +129,7 @@ def resolve_keyed_by(item, field, item_name, **extra_values):
WHITELISTED_SCHEMA_IDENTIFIERS = [ WHITELISTED_SCHEMA_IDENTIFIERS = [
# upstream-artifacts are handed directly to scriptWorker, which expects interCaps # upstream-artifacts are handed directly to scriptWorker, which expects interCaps
lambda path: "[u'upstream-artifacts']" in path, lambda path: "[u'upstream-artifacts']" in path,
lambda path: "[u'browsertime_json_url']" in path or "[u'video_url']" in path, lambda path: "[u'json_location']" in path or "[u'video_location']" in path,
] ]

Просмотреть файл

@ -18,6 +18,7 @@ import subprocess
import sys import sys
import tempfile import tempfile
import time import time
import tarfile
import requests import requests
@ -1798,6 +1799,16 @@ def main(args=sys.argv[1:]):
LOG.critical(" ".join("%s: %s" % (subject, msg) for subject, msg in message)) LOG.critical(" ".join("%s: %s" % (subject, msg) for subject, msg in message))
os.sys.exit(1) os.sys.exit(1)
# if we're running browsertime in the CI, we want to zip the result dir
if args.browsertime and not args.run_local:
result_dir = raptor.results_handler.result_dir()
if os.path.exists(result_dir):
LOG.info("Creating tarball at %s" % result_dir + ".tgz")
with tarfile.open(result_dir + ".tgz", "w:gz") as tar:
tar.add(result_dir, arcname=os.path.basename(result_dir))
LOG.info("Removing %s" % result_dir)
shutil.rmtree(result_dir)
# when running raptor locally with gecko profiling on, use the view-gecko-profile # when running raptor locally with gecko profiling on, use the view-gecko-profile
# tool to automatically load the latest gecko profile in profiler.firefox.com # tool to automatically load the latest gecko profile in profiler.firefox.com
if args.gecko_profile and args.run_local: if args.gecko_profile and args.run_local:

Просмотреть файл

@ -226,6 +226,9 @@ class BrowsertimeResultsHandler(PerftestResultsHandler):
super(BrowsertimeResultsHandler, self).__init__(**config) super(BrowsertimeResultsHandler, self).__init__(**config)
self._root_results_dir = root_results_dir self._root_results_dir = root_results_dir
def result_dir(self):
return self._root_results_dir
def result_dir_for_test(self, test): def result_dir_for_test(self, test):
return os.path.join(self._root_results_dir, test['name']) return os.path.join(self._root_results_dir, test['name'])
@ -436,28 +439,28 @@ class BrowsertimeResultsHandler(PerftestResultsHandler):
return results return results
def _extract_vmetrics_jobs(self, test, browsertime_json, browsertime_results): def _extract_vmetrics(self, browsertime_json, browsertime_results):
# XXX will do better later # The visual metrics task expects posix paths.
url = ("{root_url}/api/queue/v1/task/{task_id}/runs/0/artifacts/public/" def _normalized_join(*args):
"test_info/".format( path = os.path.join(*args)
root_url=os.environ.get('TASKCLUSTER_ROOT_URL', 'taskcluster-root-url.invalid'), return path.replace(os.path.sep, "/")
task_id=os.environ.get("TASK_ID", "??"),
))
json_url = url + "/".join(browsertime_json.split(os.path.sep)[-3:])
files = []
for res in browsertime_results:
files.extend(res.get("files", {}).get("video", []))
if len(files) == 0:
# no video files.
return None
name = browsertime_json.split(os.path.sep)[-2] name = browsertime_json.split(os.path.sep)[-2]
result = [] reldir = _normalized_join("browsertime-results", name)
for file in files:
video_url = url + "browsertime-results/" + name + "/" + file def _extract_metrics(res):
result.append({"browsertime_json_url": json_url, # extracts the video files in one result and send back the
"video_url": video_url}) # mapping expected by the visual metrics task
return result vfiles = res.get("files", {}).get("video", [])
return [{"json_location": _normalized_join(reldir, "browsertime.json"),
"video_location": _normalized_join(reldir, vfile)}
for vfile in vfiles]
vmetrics = []
for res in browsertime_results:
vmetrics.extend(_extract_metrics(res))
return len(vmetrics) > 0 and vmetrics or None
def summarize_and_output(self, test_config, tests, test_names): def summarize_and_output(self, test_config, tests, test_names):
""" """
@ -508,7 +511,7 @@ class BrowsertimeResultsHandler(PerftestResultsHandler):
raise raise
if not run_local: if not run_local:
video_files = self._extract_vmetrics_jobs(test, bt_res_json, raw_btresults) video_files = self._extract_vmetrics(bt_res_json, raw_btresults)
if video_files: if video_files:
video_jobs.extend(video_files) video_jobs.extend(video_files)
@ -586,9 +589,10 @@ class BrowsertimeResultsHandler(PerftestResultsHandler):
if not self.gecko_profile: if not self.gecko_profile:
validate_success = self._validate_treeherder_data(output, out_perfdata) validate_success = self._validate_treeherder_data(output, out_perfdata)
# Dumping the video list for the visual metrics task. # Dumping the video list for the visual metrics task at the root of
# the browsertime results dir.
if len(video_jobs) > 0: if len(video_jobs) > 0:
jobs_file = os.path.join(test_config["artifact_dir"], "jobs.json") jobs_file = os.path.join(self.result_dir(), "jobs.json")
LOG.info("Writing %d video jobs into %s" % (len(video_jobs), jobs_file)) LOG.info("Writing %d video jobs into %s" % (len(video_jobs), jobs_file))
with open(jobs_file, "w") as f: with open(jobs_file, "w") as f:
f.write(json.dumps({"jobs": video_jobs})) f.write(json.dumps({"jobs": video_jobs}))