зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1612005 - Do not specifiy individual videos in jobs.json r=tarek,perftest-reviewers,sparky
The `run-visual-metrics.py` script was intended to consume a `jobs.json` file containing one `browsertime.json` per video. However it was not being used as such and was continuously re-processing the first video specified in the `browsertime.json` file. If a job were submitted with a `browsertime.json` containing 15 videos and 15 different videos, only the first would be processed. This leads to us having incorrect metrics because over all runs all the metrics will be identical. Now we only specify the `browsertime.json` in the `jobs.json` file and extract the paths to videos from there. Also because we are never downloading inputs this way, we get to remove some dead code and our dependency on `requests`. Differential Revision: https://phabricator.services.mozilla.com/D62320 --HG-- extra : moz-landing-system : lando
This commit is contained in:
Родитель
109249c95e
Коммит
d9766a8b08
|
@ -1,17 +1,12 @@
|
|||
# Direct dependencies
|
||||
attrs==19.1.0 --hash=sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79
|
||||
requests==2.22.0 --hash=sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31
|
||||
structlog==19.1.0 --hash=sha256:db441b81c65b0f104a7ce5d86c5432be099956b98b8a2c8be0b3fb3a7a0b1536
|
||||
voluptuous==0.11.5 --hash=sha256:303542b3fc07fb52ec3d7a1c614b329cdbee13a9d681935353d8ea56a7bfa9f1
|
||||
jsonschema==3.2.0 --hash=sha256:4e5b3cf8216f577bee9ce139cbe72eca3ea4f292ec60928ff24758ce626cd163
|
||||
|
||||
# Transitive dependencies
|
||||
certifi==2019.6.16 --hash=sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939
|
||||
chardet==3.0.4 --hash=sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691
|
||||
idna==2.8 --hash=sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c
|
||||
importlib_metadata==1.1.0 --hash=sha256:e6ac600a142cf2db707b1998382cc7fc3b02befb7273876e01b8ad10b9652742
|
||||
more_itertools==8.0.0 --hash=sha256:a0ea684c39bc4315ba7aae406596ef191fd84f873d2d2751f84d64e81a7a2d45
|
||||
pyrsistent==0.15.6 --hash=sha256:f3b280d030afb652f79d67c5586157c5c1355c9a58dfc7940566e28d28f3df1b
|
||||
six==1.12.0 --hash=sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c
|
||||
urllib3==1.25.3 --hash=sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1
|
||||
zipp==0.6.0 --hash=sha256:f06903e9f1f43b12d371004b4ac7b06ab39a44adc747266928ae6debfa7b3335
|
||||
|
|
|
@ -10,27 +10,19 @@
|
|||
import argparse
|
||||
import os
|
||||
import json
|
||||
import shutil
|
||||
import sys
|
||||
import tarfile
|
||||
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from functools import partial
|
||||
from multiprocessing import cpu_count
|
||||
from pathlib import Path
|
||||
|
||||
import attr
|
||||
import requests
|
||||
import structlog
|
||||
import subprocess
|
||||
|
||||
from jsonschema import validate
|
||||
from voluptuous import Required, Schema
|
||||
|
||||
#: The workspace directory where files will be downloaded, etc.
|
||||
WORKSPACE_DIR = Path("/", "builds", "worker", "workspace")
|
||||
|
||||
#: The directory where job artifacts will be stored.
|
||||
WORKSPACE_JOBS_DIR = WORKSPACE_DIR / "jobs"
|
||||
from voluptuous import ALLOW_EXTRA, Required, Schema
|
||||
|
||||
#: The directory where artifacts from this job will be placed.
|
||||
OUTPUT_DIR = Path("/", "builds", "worker", "artifacts")
|
||||
|
@ -41,33 +33,18 @@ class Job:
|
|||
#: The name of the test.
|
||||
test_name = attr.ib(type=str)
|
||||
|
||||
#: The directory for all the files pertaining to the job.
|
||||
job_dir = attr.ib(type=Path)
|
||||
|
||||
#: json_path: The path to the ``browsertime.json`` file on disk.
|
||||
json_path = attr.ib(type=Path)
|
||||
|
||||
#: json_location: The location or URL of the ``browsertime.json`` file.
|
||||
json_location = attr.ib(type=str)
|
||||
|
||||
#: video_path: The path of the video file on disk.
|
||||
video_path = attr.ib(type=Path)
|
||||
|
||||
#: video_location: The path or URL of the video file.
|
||||
video_location = attr.ib(type=str)
|
||||
|
||||
|
||||
# NB: Keep in sync with try_task_config_schema in
|
||||
# taskcluster/taskgraph.decision.py
|
||||
#: The schema for validating jobs.
|
||||
JOB_SCHEMA = Schema(
|
||||
{
|
||||
Required("jobs"): [
|
||||
{
|
||||
Required("test_name"): str,
|
||||
Required("json_location"): str,
|
||||
Required("video_location"): str,
|
||||
}
|
||||
{Required("test_name"): str, Required("browsertime_json_path"): str}
|
||||
]
|
||||
}
|
||||
)
|
||||
|
@ -82,6 +59,11 @@ APP_SCHEMA = Schema(
|
|||
}
|
||||
)
|
||||
|
||||
#: A partial schema for browsertime.json files.
|
||||
BROWSERTIME_SCHEMA = Schema(
|
||||
[{Required("files"): {Required("video"): [str]}}], extra=ALLOW_EXTRA
|
||||
)
|
||||
|
||||
PERFHERDER_SCHEMA = Path("/", "builds", "worker", "performance-artifact-schema.json")
|
||||
with PERFHERDER_SCHEMA.open() as f:
|
||||
PERFHERDER_SCHEMA = json.loads(f.read())
|
||||
|
@ -245,25 +227,35 @@ def main(log, args):
|
|||
app_json_path = results_path / "browsertime-results" / "application.json"
|
||||
app_json = read_json(app_json_path, APP_SCHEMA)
|
||||
|
||||
try:
|
||||
downloaded_jobs, failed_downloads = download_inputs(log, jobs_json["jobs"])
|
||||
except Exception as e:
|
||||
log.error("Failed to download jobs: %s" % e, exc_info=True)
|
||||
return 1
|
||||
jobs = []
|
||||
|
||||
for job in jobs_json["jobs"]:
|
||||
browsertime_json_path = fetch_dir / job["browsertime_json_path"]
|
||||
browsertime_json = read_json(browsertime_json_path, BROWSERTIME_SCHEMA)
|
||||
|
||||
for site in browsertime_json:
|
||||
for video in site["files"]["video"]:
|
||||
jobs.append(
|
||||
Job(
|
||||
test_name=job["test_name"],
|
||||
json_path=browsertime_json_path,
|
||||
video_path=browsertime_json_path.parent / video,
|
||||
)
|
||||
)
|
||||
|
||||
failed_runs = 0
|
||||
suites = {}
|
||||
|
||||
with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
|
||||
for job, result in zip(
|
||||
downloaded_jobs,
|
||||
jobs,
|
||||
executor.map(
|
||||
partial(
|
||||
run_visual_metrics,
|
||||
visualmetrics_path=visualmetrics_path,
|
||||
options=args.visual_metrics_options,
|
||||
),
|
||||
downloaded_jobs,
|
||||
jobs,
|
||||
),
|
||||
):
|
||||
returncode, res = result
|
||||
|
@ -304,166 +296,16 @@ def main(log, args):
|
|||
with Path(OUTPUT_DIR, "summary.json").open("w") as f:
|
||||
json.dump(
|
||||
{
|
||||
"total_jobs": len(downloaded_jobs) + len(failed_downloads),
|
||||
"successful_runs": len(downloaded_jobs) - failed_runs,
|
||||
"total_jobs": len(jobs),
|
||||
"successful_runs": len(jobs) - failed_runs,
|
||||
"failed_runs": failed_runs,
|
||||
"failed_downloads": [
|
||||
{
|
||||
"video_location": job.video_location,
|
||||
"json_location": job.json_location,
|
||||
"test_name": job.test_name,
|
||||
}
|
||||
for job in failed_downloads
|
||||
],
|
||||
},
|
||||
f,
|
||||
)
|
||||
|
||||
# If there's one failure along the way, we want to return > 0
|
||||
# to trigger a red job in TC.
|
||||
return len(failed_downloads) + failed_runs
|
||||
|
||||
|
||||
def download_inputs(log, raw_jobs):
|
||||
"""Download the inputs for all jobs in parallel.
|
||||
|
||||
Args:
|
||||
log: The structlog logger instance.
|
||||
raw_jobs: The list of unprocessed jobs from the ``jobs.json`` input file.
|
||||
|
||||
Returns:
|
||||
A tuple of the successfully downloaded jobs and the failed to download jobs.
|
||||
"""
|
||||
WORKSPACE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
pending_jobs = []
|
||||
for i, job in enumerate(raw_jobs):
|
||||
job_dir = WORKSPACE_JOBS_DIR / str(i)
|
||||
job_dir.mkdir(parents=True, exist_ok=True)
|
||||
pending_jobs.append(
|
||||
Job(
|
||||
job["test_name"],
|
||||
job_dir,
|
||||
job_dir / "browsertime.json",
|
||||
job["json_location"],
|
||||
job_dir / "video",
|
||||
job["video_location"],
|
||||
)
|
||||
)
|
||||
|
||||
downloaded_jobs = []
|
||||
failed_jobs = []
|
||||
|
||||
with ThreadPoolExecutor(max_workers=8) as executor:
|
||||
for job, success in executor.map(partial(download_job, log), pending_jobs):
|
||||
if success:
|
||||
downloaded_jobs.append(job)
|
||||
else:
|
||||
job.job_dir.rmdir()
|
||||
failed_jobs.append(job)
|
||||
|
||||
return downloaded_jobs, failed_jobs
|
||||
|
||||
|
||||
def download_job(log, job):
|
||||
"""Download the files for a given job.
|
||||
|
||||
Args:
|
||||
log: The structlog logger instance.
|
||||
job: The job to download.
|
||||
|
||||
Returns:
|
||||
A tuple of the job and whether or not the download was successful.
|
||||
|
||||
The returned job will be updated so that it's :attr:`Job.video_path`
|
||||
attribute is updated to match the file path given by the video file
|
||||
in the ``browsertime.json`` file.
|
||||
"""
|
||||
fetch_dir = Path(os.getenv("MOZ_FETCHES_DIR"))
|
||||
log = log.bind(json_location=job.json_location)
|
||||
try:
|
||||
download_or_copy(fetch_dir / job.video_location, job.video_path)
|
||||
download_or_copy(fetch_dir / job.json_location, job.json_path)
|
||||
except Exception as e:
|
||||
log.error(
|
||||
"Failed to download files for job: %s" % e,
|
||||
video_location=job.video_location,
|
||||
exc_info=True,
|
||||
)
|
||||
return job, False
|
||||
|
||||
try:
|
||||
with job.json_path.open("r", encoding="utf-8") as f:
|
||||
browsertime_json = json.load(f)
|
||||
except OSError as e:
|
||||
log.error("Could not read browsertime.json: %s" % e)
|
||||
return job, False
|
||||
except ValueError as e:
|
||||
log.error("Could not parse browsertime.json as JSON: %s" % e)
|
||||
return job, False
|
||||
|
||||
try:
|
||||
video_path = job.job_dir / browsertime_json[0]["files"]["video"][0]
|
||||
except KeyError:
|
||||
log.error("Could not read video path from browsertime.json file")
|
||||
return job, False
|
||||
|
||||
video_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
job.video_path.rename(video_path)
|
||||
job.video_path = video_path
|
||||
|
||||
return job, True
|
||||
|
||||
|
||||
def download_or_copy(url_or_location, path):
|
||||
"""Download the resource at the given URL or path to the local path.
|
||||
|
||||
Args:
|
||||
url_or_location: The URL or path of the resource to download or copy.
|
||||
path: The local path to download or copy the resource to.
|
||||
|
||||
Raises:
|
||||
OSError:
|
||||
Raised if an IO error occurs while writing the file.
|
||||
|
||||
requests.exceptions.HTTPError:
|
||||
Raised when an HTTP error (including e.g., HTTP 404) occurs.
|
||||
"""
|
||||
url_or_location = str(url_or_location)
|
||||
if os.path.exists(url_or_location):
|
||||
shutil.copyfile(url_or_location, str(path))
|
||||
return
|
||||
elif not url_or_location.startswith("http"):
|
||||
raise IOError(
|
||||
"%s does not seem to be an URL or an existing file" % url_or_location
|
||||
)
|
||||
|
||||
download(url_or_location, path)
|
||||
|
||||
|
||||
def download(url, path):
|
||||
"""Download the resource at the given URL to the local path.
|
||||
|
||||
Args:
|
||||
url: The URL of the resource to download.
|
||||
path: The local path to download the resource to.
|
||||
|
||||
Raises:
|
||||
OSError:
|
||||
Raised if an IO error occurs while writing the file.
|
||||
|
||||
requests.exceptions.HTTPError:
|
||||
Raised when an HTTP error (including e.g., HTTP 404) occurs.
|
||||
"""
|
||||
request = requests.get(url, stream=True)
|
||||
request.raise_for_status()
|
||||
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with path.open("wb") as f:
|
||||
for chunk in request:
|
||||
f.write(chunk)
|
||||
return failed_runs
|
||||
|
||||
|
||||
def run_visual_metrics(job, visualmetrics_path, options):
|
||||
|
|
|
@ -484,7 +484,7 @@ class BrowsertimeResultsHandler(PerftestResultsHandler):
|
|||
|
||||
return results
|
||||
|
||||
def _extract_vmetrics(self, test_name, browsertime_json, browsertime_results):
|
||||
def _extract_vmetrics(self, test_name, browsertime_json):
|
||||
# The visual metrics task expects posix paths.
|
||||
def _normalized_join(*args):
|
||||
path = os.path.join(*args)
|
||||
|
@ -493,24 +493,10 @@ class BrowsertimeResultsHandler(PerftestResultsHandler):
|
|||
name = browsertime_json.split(os.path.sep)[-2]
|
||||
reldir = _normalized_join("browsertime-results", name)
|
||||
|
||||
def _extract_metrics(res):
|
||||
# extracts the video files in one result and send back the
|
||||
# mapping expected by the visual metrics task
|
||||
vfiles = res.get("files", {}).get("video", [])
|
||||
return [
|
||||
{
|
||||
"json_location": _normalized_join(reldir, "browsertime.json"),
|
||||
"video_location": _normalized_join(reldir, vfile),
|
||||
"test_name": test_name,
|
||||
}
|
||||
for vfile in vfiles
|
||||
]
|
||||
|
||||
vmetrics = []
|
||||
for res in browsertime_results:
|
||||
vmetrics.extend(_extract_metrics(res))
|
||||
|
||||
return len(vmetrics) > 0 and vmetrics or None
|
||||
return {
|
||||
"browsertime_json_path": _normalized_join(reldir, "browsertime.json"),
|
||||
"test_name": test_name,
|
||||
}
|
||||
|
||||
def summarize_and_output(self, test_config, tests, test_names):
|
||||
"""
|
||||
|
@ -564,11 +550,7 @@ class BrowsertimeResultsHandler(PerftestResultsHandler):
|
|||
raise
|
||||
|
||||
if not run_local:
|
||||
video_files = self._extract_vmetrics(
|
||||
test_name, bt_res_json, raw_btresults
|
||||
)
|
||||
if video_files:
|
||||
video_jobs.extend(video_files)
|
||||
video_jobs.append(self._extract_vmetrics(test_name, bt_res_json))
|
||||
|
||||
for new_result in self.parse_browsertime_json(
|
||||
raw_btresults,
|
||||
|
|
Загрузка…
Ссылка в новой задаче