зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1595789 - zip browsertime results r=rwood,sparky,barret
Instead of using a json file that contains URLs to task cluster artifacts, let's use a single zip file that contains all video files to analyze. This will reduce the number of artifacts we generate per raptor run and the number of network downloads. The run-visual-metrics gets the tarball directly when it's used as a sub-task and produces its own result tarball notice that at this point I don't see any reason to copy in the vismet results tarball all the files. Maybe we should simply generate a *single* json file that contains a merge of all visualmetrics results? But that could be done later. lmk Differential Revision: https://phabricator.services.mozilla.com/D52907 --HG-- extra : moz-landing-system : lando
This commit is contained in:
Родитель
4c59d38eed
Коммит
9705cbd3c7
|
@ -1041,7 +1041,7 @@ browsertime-tp6-1:
|
|||
run-visual-metrics:
|
||||
by-app:
|
||||
chrome: false
|
||||
default: false
|
||||
default: true
|
||||
mozharness:
|
||||
extra-options:
|
||||
- --browsertime
|
||||
|
|
|
@ -39,5 +39,5 @@ job-template:
|
|||
- visual-metrics
|
||||
run:
|
||||
using: run-task
|
||||
command: /builds/worker/bin/run-visual-metrics.py --jobs-json-path /builds/worker/fetches/jobs.json -- --orange --perceptual --contentful --force --renderignore 5 --json --viewport
|
||||
command: /builds/worker/bin/run-visual-metrics.py --browsertime-results /builds/worker/fetches/browsertime-results.tgz -- --orange --perceptual --contentful --force --renderignore 5 --json --viewport
|
||||
checkout: false
|
||||
|
|
|
@ -5,20 +5,14 @@
|
|||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
"""Instrument visualmetrics.py to run in parallel.
|
||||
|
||||
Environment variables:
|
||||
|
||||
VISUAL_METRICS_JOBS_JSON:
|
||||
A JSON blob containing the job descriptions.
|
||||
|
||||
Can be overridden with the --jobs-json-path option set to a local file
|
||||
path.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
import tarfile
|
||||
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
||||
from functools import partial
|
||||
from multiprocessing import cpu_count
|
||||
|
@ -28,7 +22,7 @@ import attr
|
|||
import requests
|
||||
import structlog
|
||||
import subprocess
|
||||
from voluptuous import Required, Schema, Url
|
||||
from voluptuous import Required, Schema
|
||||
|
||||
#: The workspace directory where files will be downloaded, etc.
|
||||
WORKSPACE_DIR = Path("/", "builds", "worker", "workspace")
|
||||
|
@ -48,14 +42,14 @@ class Job:
|
|||
#: json_path: The path to the ``browsertime.json`` file on disk.
|
||||
json_path = attr.ib(type=Path)
|
||||
|
||||
#: json_url: The URL of the ``browsertime.json`` file.
|
||||
json_url = attr.ib(type=str)
|
||||
#: json_location: The location or URL of the ``browsertime.json`` file.
|
||||
json_location = attr.ib(type=str)
|
||||
|
||||
#: video_path: The path of the video file on disk.
|
||||
video_path = attr.ib(type=Path)
|
||||
|
||||
#: video_url: The URl of the video file.
|
||||
video_url = attr.ib(type=str)
|
||||
#: video_location: The path or URL of the video file.
|
||||
video_location = attr.ib(type=str)
|
||||
|
||||
|
||||
# NB: Keep in sync with try_task_config_schema in
|
||||
|
@ -64,7 +58,7 @@ class Job:
|
|||
JOB_SCHEMA = Schema(
|
||||
{
|
||||
Required("jobs"): [
|
||||
{Required("browsertime_json_url"): Url(), Required("video_url"): Url()}
|
||||
{Required("json_location"): str, Required("video_location"): str}
|
||||
]
|
||||
}
|
||||
)
|
||||
|
@ -86,8 +80,7 @@ def run_command(log, cmd):
|
|||
log.info("Command succeeded", result=res)
|
||||
return 0, res
|
||||
except subprocess.CalledProcessError as e:
|
||||
log.info("Command failed", cmd=cmd, status=e.returncode,
|
||||
output=e.output)
|
||||
log.info("Command failed", cmd=cmd, status=e.returncode, output=e.output)
|
||||
return e.returncode, e.output
|
||||
|
||||
|
||||
|
@ -109,50 +102,33 @@ def main(log, args):
|
|||
visualmetrics_path = Path(fetch_dir) / "visualmetrics.py"
|
||||
if not visualmetrics_path.exists():
|
||||
log.error(
|
||||
"Could not locate visualmetrics.py",
|
||||
expected_path=str(visualmetrics_path)
|
||||
"Could not locate visualmetrics.py", expected_path=str(visualmetrics_path)
|
||||
)
|
||||
return 1
|
||||
|
||||
if args.jobs_json_path:
|
||||
results_path = Path(args.browsertime_results).parent
|
||||
try:
|
||||
with open(str(args.jobs_json_path), "r") as f:
|
||||
jobs_json = json.load(f)
|
||||
except Exception as e:
|
||||
with tarfile.open(str(args.browsertime_results)) as tar:
|
||||
tar.extractall(path=str(results_path))
|
||||
except Exception:
|
||||
log.error(
|
||||
"Could not read jobs.json file: %s" % e,
|
||||
path=args.jobs_json_path,
|
||||
"Could not read extract browsertime results archive",
|
||||
path=args.browsertime_results,
|
||||
exc_info=True,
|
||||
)
|
||||
return 1
|
||||
|
||||
log.info(
|
||||
"Loaded jobs.json from file", path=args.jobs_json_path, jobs_json=jobs_json
|
||||
)
|
||||
|
||||
else:
|
||||
raw_jobs_json = os.getenv("VISUAL_METRICS_JOBS_JSON")
|
||||
if raw_jobs_json is not None and isinstance(raw_jobs_json, bytes):
|
||||
raw_jobs_json = raw_jobs_json.decode("utf-8")
|
||||
elif raw_jobs_json is None:
|
||||
log.error(
|
||||
"Expected one of --jobs-json-path or "
|
||||
"VISUAL_METRICS_JOBS_JSON environment variable."
|
||||
)
|
||||
return 1
|
||||
|
||||
log.info("Extracted browsertime results", path=args.browsertime_results)
|
||||
jobs_json_path = results_path / "browsertime-results" / "jobs.json"
|
||||
try:
|
||||
jobs_json = json.loads(raw_jobs_json)
|
||||
except (TypeError, ValueError) as e:
|
||||
with open(str(jobs_json_path), "r") as f:
|
||||
jobs_json = json.load(f)
|
||||
except Exception as e:
|
||||
log.error(
|
||||
"Failed to decode VISUAL_METRICS_JOBS_JSON environment "
|
||||
"variable: %s" % e,
|
||||
value=raw_jobs_json,
|
||||
"Could not read jobs.json file: %s" % e, path=jobs_json_path, exc_info=True
|
||||
)
|
||||
return 1
|
||||
|
||||
log.info("Parsed jobs.json from environment", jobs_json=jobs_json)
|
||||
|
||||
log.info("Loaded jobs.json from file", path=jobs_json_path, jobs_json=jobs_json)
|
||||
try:
|
||||
JOB_SCHEMA(jobs_json)
|
||||
except Exception as e:
|
||||
|
@ -165,6 +141,8 @@ def main(log, args):
|
|||
log.error("Failed to download jobs: %s" % e, exc_info=True)
|
||||
return 1
|
||||
|
||||
runs_failed = 0
|
||||
|
||||
with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
|
||||
for job, result in zip(
|
||||
downloaded_jobs,
|
||||
|
@ -180,8 +158,11 @@ def main(log, args):
|
|||
returncode, res = result
|
||||
if returncode != 0:
|
||||
log.error(
|
||||
"Failed to run visualmetrics.py", video_url=job.video_url, error=res
|
||||
"Failed to run visualmetrics.py",
|
||||
video_location=job.video_location,
|
||||
error=res,
|
||||
)
|
||||
runs_failed += 1
|
||||
else:
|
||||
path = job.job_dir / "visual-metrics.json"
|
||||
with path.open("wb") as f:
|
||||
|
@ -195,28 +176,35 @@ def main(log, args):
|
|||
{
|
||||
"successful_jobs": [
|
||||
{
|
||||
"video_url": job.video_url,
|
||||
"browsertime_json_url": job.json_url,
|
||||
"video_location": job.video_location,
|
||||
"json_location": job.json_location,
|
||||
"path": (str(job.job_dir.relative_to(WORKSPACE_DIR)) + "/"),
|
||||
}
|
||||
for job in downloaded_jobs
|
||||
],
|
||||
"failed_jobs": [
|
||||
{"video_url": job.video_url, "browsertime_json_url": job.json_url}
|
||||
{
|
||||
"video_location": job.video_location,
|
||||
"json_location": job.json_location,
|
||||
}
|
||||
for job in failed_jobs
|
||||
],
|
||||
},
|
||||
f,
|
||||
)
|
||||
|
||||
tarfile = OUTPUT_DIR / "visual-metrics.tar.xz"
|
||||
log.info("Creating the tarfile", tarfile=tarfile)
|
||||
archive = OUTPUT_DIR / "visual-metrics.tar.xz"
|
||||
log.info("Creating the tarfile", tarfile=archive)
|
||||
returncode, res = run_command(
|
||||
log, ["tar", "cJf", str(tarfile), "-C", str(WORKSPACE_DIR), "."]
|
||||
log, ["tar", "cJf", str(archive), "-C", str(WORKSPACE_DIR), "."]
|
||||
)
|
||||
if returncode != 0:
|
||||
raise Exception("Could not tar the results")
|
||||
|
||||
# If there's one failure along the way, we want to return > 0
|
||||
# to trigger a red job in TC.
|
||||
return len(failed_jobs) + runs_failed
|
||||
|
||||
|
||||
def download_inputs(log, raw_jobs):
|
||||
"""Download the inputs for all jobs in parallel.
|
||||
|
@ -234,14 +222,13 @@ def download_inputs(log, raw_jobs):
|
|||
for i, job in enumerate(raw_jobs):
|
||||
job_dir = WORKSPACE_JOBS_DIR / str(i)
|
||||
job_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
pending_jobs.append(
|
||||
Job(
|
||||
job_dir,
|
||||
job_dir / "browsertime.json",
|
||||
job["browsertime_json_url"],
|
||||
job["json_location"],
|
||||
job_dir / "video",
|
||||
job["video_url"],
|
||||
job["video_location"],
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -273,14 +260,15 @@ def download_job(log, job):
|
|||
attribute is updated to match the file path given by the video file
|
||||
in the ``browsertime.json`` file.
|
||||
"""
|
||||
log = log.bind(json_url=job.json_url)
|
||||
fetch_dir = Path(os.getenv("MOZ_FETCHES_DIR"))
|
||||
log = log.bind(json_location=job.json_location)
|
||||
try:
|
||||
download(job.video_url, job.video_path)
|
||||
download(job.json_url, job.json_path)
|
||||
download_or_copy(fetch_dir / job.video_location, job.video_path)
|
||||
download_or_copy(fetch_dir / job.json_location, job.json_path)
|
||||
except Exception as e:
|
||||
log.error(
|
||||
"Failed to download files for job: %s" % e,
|
||||
video_url=job.video_url,
|
||||
video_location=job.video_location,
|
||||
exc_info=True,
|
||||
)
|
||||
return job, False
|
||||
|
@ -309,6 +297,27 @@ def download_job(log, job):
|
|||
return job, True
|
||||
|
||||
|
||||
def download_or_copy(url_or_location, path):
|
||||
"""Download the resource at the given URL or path to the local path.
|
||||
|
||||
Args:
|
||||
url_or_location: The URL or path of the resource to download or copy.
|
||||
path: The local path to download or copy the resource to.
|
||||
|
||||
Raises:
|
||||
OSError:
|
||||
Raised if an IO error occurs while writing the file.
|
||||
|
||||
requests.exceptions.HTTPError:
|
||||
Raised when an HTTP error (including e.g., HTTP 404) occurs.
|
||||
"""
|
||||
url_or_location = str(url_or_location)
|
||||
if os.path.exists(url_or_location):
|
||||
shutil.copyfile(url_or_location, str(path))
|
||||
return
|
||||
download(url_or_location, path)
|
||||
|
||||
|
||||
def download(url, path):
|
||||
"""Download the resource at the given URL to the local path.
|
||||
|
||||
|
@ -357,16 +366,15 @@ if __name__ == "__main__":
|
|||
parser = argparse.ArgumentParser(
|
||||
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--jobs-json-path",
|
||||
"--browsertime-results",
|
||||
type=Path,
|
||||
metavar="PATH",
|
||||
help=(
|
||||
"The path to the jobs.json file. If not present, the "
|
||||
"VISUAL_METRICS_JOBS_JSON environment variable will be used "
|
||||
"instead."
|
||||
)
|
||||
help="The path to the browsertime results tarball.",
|
||||
required=True,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"visual_metrics_options",
|
||||
type=str,
|
||||
|
|
|
@ -27,7 +27,7 @@ from .util.schema import validate_schema, Schema
|
|||
from .util.taskcluster import get_artifact
|
||||
from .util.taskgraph import find_decision_task, find_existing_tasks_from_previous_kinds
|
||||
from .util.yaml import load_yaml
|
||||
from voluptuous import Required, Optional, Url
|
||||
from voluptuous import Required, Optional
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -120,8 +120,8 @@ PER_PROJECT_PARAMETERS = {
|
|||
visual_metrics_jobs_schema = Schema({
|
||||
Required('jobs'): [
|
||||
{
|
||||
Required('browsertime_json_url'): Url(),
|
||||
Required('video_url'): Url(),
|
||||
Required('json_location'): str,
|
||||
Required('video_location'): str,
|
||||
}
|
||||
]
|
||||
})
|
||||
|
|
|
@ -23,7 +23,7 @@ def run_visual_metrics(config, jobs):
|
|||
if dep_job is not None:
|
||||
platform = dep_job.task['extra']['treeherder-platform']
|
||||
job['dependencies'] = {dep_job.label: dep_job.label}
|
||||
job['fetches'][dep_job.label] = ['/public/test_info/jobs.json']
|
||||
job['fetches'][dep_job.label] = ['/public/test_info/browsertime-results.tgz']
|
||||
attributes = dict(dep_job.attributes)
|
||||
attributes['platform'] = platform
|
||||
job['label'] = LABEL % attributes
|
||||
|
|
|
@ -129,7 +129,7 @@ def resolve_keyed_by(item, field, item_name, **extra_values):
|
|||
WHITELISTED_SCHEMA_IDENTIFIERS = [
|
||||
# upstream-artifacts are handed directly to scriptWorker, which expects interCaps
|
||||
lambda path: "[u'upstream-artifacts']" in path,
|
||||
lambda path: "[u'browsertime_json_url']" in path or "[u'video_url']" in path,
|
||||
lambda path: "[u'json_location']" in path or "[u'video_location']" in path,
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ import subprocess
|
|||
import sys
|
||||
import tempfile
|
||||
import time
|
||||
import tarfile
|
||||
|
||||
import requests
|
||||
|
||||
|
@ -1798,6 +1799,16 @@ def main(args=sys.argv[1:]):
|
|||
LOG.critical(" ".join("%s: %s" % (subject, msg) for subject, msg in message))
|
||||
os.sys.exit(1)
|
||||
|
||||
# if we're running browsertime in the CI, we want to zip the result dir
|
||||
if args.browsertime and not args.run_local:
|
||||
result_dir = raptor.results_handler.result_dir()
|
||||
if os.path.exists(result_dir):
|
||||
LOG.info("Creating tarball at %s" % result_dir + ".tgz")
|
||||
with tarfile.open(result_dir + ".tgz", "w:gz") as tar:
|
||||
tar.add(result_dir, arcname=os.path.basename(result_dir))
|
||||
LOG.info("Removing %s" % result_dir)
|
||||
shutil.rmtree(result_dir)
|
||||
|
||||
# when running raptor locally with gecko profiling on, use the view-gecko-profile
|
||||
# tool to automatically load the latest gecko profile in profiler.firefox.com
|
||||
if args.gecko_profile and args.run_local:
|
||||
|
|
|
@ -226,6 +226,9 @@ class BrowsertimeResultsHandler(PerftestResultsHandler):
|
|||
super(BrowsertimeResultsHandler, self).__init__(**config)
|
||||
self._root_results_dir = root_results_dir
|
||||
|
||||
def result_dir(self):
|
||||
return self._root_results_dir
|
||||
|
||||
def result_dir_for_test(self, test):
|
||||
return os.path.join(self._root_results_dir, test['name'])
|
||||
|
||||
|
@ -436,28 +439,28 @@ class BrowsertimeResultsHandler(PerftestResultsHandler):
|
|||
|
||||
return results
|
||||
|
||||
def _extract_vmetrics_jobs(self, test, browsertime_json, browsertime_results):
|
||||
# XXX will do better later
|
||||
url = ("{root_url}/api/queue/v1/task/{task_id}/runs/0/artifacts/public/"
|
||||
"test_info/".format(
|
||||
root_url=os.environ.get('TASKCLUSTER_ROOT_URL', 'taskcluster-root-url.invalid'),
|
||||
task_id=os.environ.get("TASK_ID", "??"),
|
||||
))
|
||||
def _extract_vmetrics(self, browsertime_json, browsertime_results):
|
||||
# The visual metrics task expects posix paths.
|
||||
def _normalized_join(*args):
|
||||
path = os.path.join(*args)
|
||||
return path.replace(os.path.sep, "/")
|
||||
|
||||
json_url = url + "/".join(browsertime_json.split(os.path.sep)[-3:])
|
||||
files = []
|
||||
for res in browsertime_results:
|
||||
files.extend(res.get("files", {}).get("video", []))
|
||||
if len(files) == 0:
|
||||
# no video files.
|
||||
return None
|
||||
name = browsertime_json.split(os.path.sep)[-2]
|
||||
result = []
|
||||
for file in files:
|
||||
video_url = url + "browsertime-results/" + name + "/" + file
|
||||
result.append({"browsertime_json_url": json_url,
|
||||
"video_url": video_url})
|
||||
return result
|
||||
reldir = _normalized_join("browsertime-results", name)
|
||||
|
||||
def _extract_metrics(res):
|
||||
# extracts the video files in one result and send back the
|
||||
# mapping expected by the visual metrics task
|
||||
vfiles = res.get("files", {}).get("video", [])
|
||||
return [{"json_location": _normalized_join(reldir, "browsertime.json"),
|
||||
"video_location": _normalized_join(reldir, vfile)}
|
||||
for vfile in vfiles]
|
||||
|
||||
vmetrics = []
|
||||
for res in browsertime_results:
|
||||
vmetrics.extend(_extract_metrics(res))
|
||||
|
||||
return len(vmetrics) > 0 and vmetrics or None
|
||||
|
||||
def summarize_and_output(self, test_config, tests, test_names):
|
||||
"""
|
||||
|
@ -508,7 +511,7 @@ class BrowsertimeResultsHandler(PerftestResultsHandler):
|
|||
raise
|
||||
|
||||
if not run_local:
|
||||
video_files = self._extract_vmetrics_jobs(test, bt_res_json, raw_btresults)
|
||||
video_files = self._extract_vmetrics(bt_res_json, raw_btresults)
|
||||
if video_files:
|
||||
video_jobs.extend(video_files)
|
||||
|
||||
|
@ -586,9 +589,10 @@ class BrowsertimeResultsHandler(PerftestResultsHandler):
|
|||
if not self.gecko_profile:
|
||||
validate_success = self._validate_treeherder_data(output, out_perfdata)
|
||||
|
||||
# Dumping the video list for the visual metrics task.
|
||||
# Dumping the video list for the visual metrics task at the root of
|
||||
# the browsertime results dir.
|
||||
if len(video_jobs) > 0:
|
||||
jobs_file = os.path.join(test_config["artifact_dir"], "jobs.json")
|
||||
jobs_file = os.path.join(self.result_dir(), "jobs.json")
|
||||
LOG.info("Writing %d video jobs into %s" % (len(video_jobs), jobs_file))
|
||||
with open(jobs_file, "w") as f:
|
||||
f.write(json.dumps({"jobs": video_jobs}))
|
||||
|
|
Загрузка…
Ссылка в новой задаче