зеркало из https://github.com/mozilla/treeherder.git
New misc script to compare pushes between Treeherder instances (#6089)
Renamed `compare_pushes.py` to `compare_tasks.py` since it is more appropiate. `compare_pushes.py` compares the last 50 pushes of various projects for different Treeherder instances. The output generates links to each instance and revision to visually compare. ```console % ./misc/compare_pushes.py --projects android-components,fenix,reference-browser,servo-master,servo-auto,servo-try Comparing android-components against production. Comparing fenix against production. Comparing reference-browser against production. {"values_changed": {"root['push_timestamp']": {"new_value": 1582580346, "old_value": 1582581477}}} https://treeherder.allizom.org/#/jobs?repo=reference-browser&revision=547a18b97534b237fa87bd22650f342836014c4e https://treeherder.mozilla.org/#/jobs?repo=reference-browser&revision=547a18b97534b237fa87bd22650f342836014c4e Comparing servo-master against production. Comparing servo-auto against production. Comparing servo-try against production. ```
This commit is contained in:
Родитель
a1d89b7657
Коммит
607727c76f
|
@ -1,18 +1,18 @@
|
|||
#!/usr/bin/env python
|
||||
""" Script to compare two pushes from different Treeherder instances"""
|
||||
""" Script to compare pushes from a Treeherder instance against production.
|
||||
|
||||
This is useful to compare if pushes between two different instances have been
|
||||
ingested differently.
|
||||
"""
|
||||
import argparse
|
||||
import logging
|
||||
import pprint
|
||||
import uuid
|
||||
|
||||
import slugid
|
||||
|
||||
from deepdiff import DeepDiff
|
||||
from thclient import TreeherderClient
|
||||
|
||||
logging.basicConfig()
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
HOSTS = {
|
||||
"localhost": "http://localhost:8000",
|
||||
"stage": "https://treeherder.allizom.org",
|
||||
|
@ -20,97 +20,54 @@ HOSTS = {
|
|||
}
|
||||
|
||||
|
||||
def remove_some_attributes(job, production_job):
|
||||
# I belive these differences are expected since they are dependant to when the data
|
||||
# was inserted inside of the database
|
||||
del job["build_platform_id"]
|
||||
del job["id"]
|
||||
del job["job_group_id"]
|
||||
del job["job_type_id"]
|
||||
del job["last_modified"]
|
||||
del job["push_id"]
|
||||
del job["result_set_id"]
|
||||
del production_job["build_platform_id"]
|
||||
del production_job["id"]
|
||||
del production_job["job_group_id"]
|
||||
del production_job["job_type_id"]
|
||||
del production_job["last_modified"]
|
||||
del production_job["push_id"]
|
||||
del production_job["result_set_id"]
|
||||
def main(args):
|
||||
compare_to_client = TreeherderClient(server_url=HOSTS[args.host])
|
||||
production_client = TreeherderClient(server_url=HOSTS["production"])
|
||||
|
||||
if job.get("end_timestamp"):
|
||||
del job["end_timestamp"]
|
||||
del job["start_timestamp"]
|
||||
del production_job["end_timestamp"]
|
||||
del production_job["start_timestamp"]
|
||||
# Support comma separated projects
|
||||
projects = args.projects.split(',')
|
||||
for _project in projects:
|
||||
logger.info("Comparing {} against production.".format(_project))
|
||||
# Remove properties that are irrelevant for the comparison
|
||||
pushes = compare_to_client.get_pushes(_project, count=50)
|
||||
for _push in sorted(pushes, key=lambda push: push["revision"]):
|
||||
del _push["id"]
|
||||
for _rev in _push["revisions"]:
|
||||
del _rev["result_set_id"]
|
||||
|
||||
if job.get("failure_classification_id"):
|
||||
del job["failure_classification_id"]
|
||||
del production_job["failure_classification_id"]
|
||||
production_pushes = production_client.get_pushes(_project, count=50)
|
||||
for _push in sorted(production_pushes, key=lambda push: push["revision"]):
|
||||
del _push["id"]
|
||||
for _rev in _push["revisions"]:
|
||||
del _rev["result_set_id"]
|
||||
|
||||
for index in range(0, len(pushes)):
|
||||
assert pushes[index]["revision"] == production_pushes[index]["revision"]
|
||||
difference = DeepDiff(pushes[index], production_pushes[index])
|
||||
if difference:
|
||||
logger.info(difference.to_json())
|
||||
logger.info("{}/#/jobs?repo={}&revision={}".format(
|
||||
compare_to_client.server_url,
|
||||
_project,
|
||||
pushes[index]["revision"]))
|
||||
logger.info("{}/#/jobs?repo={}&revision={}".format(
|
||||
production_client.server_url,
|
||||
_project,
|
||||
production_pushes[index]["revision"]))
|
||||
|
||||
|
||||
def print_url_to_taskcluster(job_guid):
|
||||
job_guid = job["job_guid"]
|
||||
(decoded_task_id, _) = job_guid.split("/")
|
||||
# As of slugid v2, slugid.encode() returns a string not bytestring under Python 3.
|
||||
taskId = slugid.encode(uuid.UUID(decoded_task_id))
|
||||
logger.info("https://taskcluster-ui.herokuapp.com/tasks/%s", taskId)
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser("Compare a push from a Treeherder instance to the production instance.")
|
||||
parser.add_argument("--host",
|
||||
default="stage",
|
||||
help="Host to compare. It defaults to stage")
|
||||
parser.add_argument("--projects",
|
||||
default="android-components,fenix",
|
||||
help="Projects (comma separated) to compare. It defaults to android-components & fenix")
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser("Compare a push from a Treeherder instance to the production instance.")
|
||||
parser.add_argument("--host", default="localhost",
|
||||
help="Host to compare. It defaults to localhost")
|
||||
parser.add_argument("--revision", required=True,
|
||||
help="Revision to compare")
|
||||
parser.add_argument("--project", default="mozilla-central",
|
||||
help="Project to compare. It defaults to mozilla-central")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
th_instance = TreeherderClient(server_url=HOSTS[args.host])
|
||||
th_instance_pushid = th_instance.get_pushes(args.project, revision=args.revision)[0]["id"]
|
||||
th_instance_jobs = th_instance.get_jobs(args.project, push_id=th_instance_pushid, count=None) or []
|
||||
|
||||
production = TreeherderClient(server_url=HOSTS["production"])
|
||||
production_pushid = production.get_pushes(args.project, revision=args.revision)[0]["id"]
|
||||
production_jobs = production.get_jobs(args.project, push_id=production_pushid, count=None)
|
||||
|
||||
production_dict = {}
|
||||
for job in production_jobs:
|
||||
production_dict[job["job_guid"]] = job
|
||||
|
||||
th_instance_dict = {}
|
||||
th_instance_not_found = []
|
||||
for job in th_instance_jobs:
|
||||
production_job = production_dict.get(job["job_guid"])
|
||||
if production_job is None:
|
||||
th_instance_not_found.append(job)
|
||||
else:
|
||||
# You can use this value in a url with &selectedJob=
|
||||
jobId = job["id"]
|
||||
remove_some_attributes(job, production_job)
|
||||
|
||||
differences = DeepDiff(job, production_dict[job["job_guid"]])
|
||||
if differences:
|
||||
pprint.pprint(differences)
|
||||
logger.info(jobId)
|
||||
else:
|
||||
# Delete jobs that don"t have any differences
|
||||
del production_dict[job["job_guid"]]
|
||||
|
||||
logger.info("We have found: %s jobs on %s instance.", len(th_instance_jobs), args.host)
|
||||
logger.info("We have found: %s jobs on the production instance.", len(production_jobs))
|
||||
|
||||
if production_dict:
|
||||
logger.info("There are the first 10 production jobs we do not have th_instancely. Follow the link to investigate.")
|
||||
for job in list(production_dict.values())[0:10]:
|
||||
print_url_to_taskcluster(job["job_guid"])
|
||||
|
||||
if th_instance_not_found:
|
||||
logger.info("Number of jobs not found th_instancely: %s jobs", len(th_instance_not_found))
|
||||
for job in th_instance_not_found:
|
||||
print_url_to_taskcluster(job["job_guid"])
|
||||
|
||||
if production_dict is None and th_instance_not_found is None:
|
||||
logger.info("We have not found any differences between the two pushes!! :D")
|
||||
main(get_args())
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
#!/usr/bin/env python
|
||||
""" Script to compare tasks from pushes on different Treeherder instances"""
|
||||
import argparse
|
||||
import logging
|
||||
import pprint
|
||||
import uuid
|
||||
|
||||
import slugid
|
||||
|
||||
from deepdiff import DeepDiff
|
||||
from thclient import TreeherderClient
|
||||
|
||||
logging.basicConfig()
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
logger = logging.getLogger(__name__)
|
||||
HOSTS = {
|
||||
"localhost": "http://localhost:8000",
|
||||
"stage": "https://treeherder.allizom.org",
|
||||
"production": "https://treeherder.mozilla.org"
|
||||
}
|
||||
|
||||
|
||||
def remove_some_attributes(job, production_job):
|
||||
# I belive these differences are expected since they are dependant to when the data
|
||||
# was inserted inside of the database
|
||||
del job["build_platform_id"]
|
||||
del job["id"]
|
||||
del job["job_group_id"]
|
||||
del job["job_type_id"]
|
||||
del job["last_modified"]
|
||||
del job["push_id"]
|
||||
del job["result_set_id"]
|
||||
del production_job["build_platform_id"]
|
||||
del production_job["id"]
|
||||
del production_job["job_group_id"]
|
||||
del production_job["job_type_id"]
|
||||
del production_job["last_modified"]
|
||||
del production_job["push_id"]
|
||||
del production_job["result_set_id"]
|
||||
|
||||
if job.get("end_timestamp"):
|
||||
del job["end_timestamp"]
|
||||
del job["start_timestamp"]
|
||||
del production_job["end_timestamp"]
|
||||
del production_job["start_timestamp"]
|
||||
|
||||
if job.get("failure_classification_id"):
|
||||
del job["failure_classification_id"]
|
||||
del production_job["failure_classification_id"]
|
||||
|
||||
|
||||
def print_url_to_taskcluster(job_guid):
|
||||
job_guid = job["job_guid"]
|
||||
(decoded_task_id, _) = job_guid.split("/")
|
||||
# As of slugid v2, slugid.encode() returns a string not bytestring under Python 3.
|
||||
taskId = slugid.encode(uuid.UUID(decoded_task_id))
|
||||
logger.info("https://taskcluster-ui.herokuapp.com/tasks/%s", taskId)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser("Compare a push from a Treeherder instance to the production instance.")
|
||||
parser.add_argument("--host", default="localhost",
|
||||
help="Host to compare. It defaults to localhost")
|
||||
parser.add_argument("--revision", required=True,
|
||||
help="Revision to compare")
|
||||
parser.add_argument("--project", default="mozilla-central",
|
||||
help="Project to compare. It defaults to mozilla-central")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
th_instance = TreeherderClient(server_url=HOSTS[args.host])
|
||||
th_instance_pushid = th_instance.get_pushes(args.project, revision=args.revision)[0]["id"]
|
||||
th_instance_jobs = th_instance.get_jobs(args.project, push_id=th_instance_pushid, count=None) or []
|
||||
|
||||
production = TreeherderClient(server_url=HOSTS["production"])
|
||||
production_pushid = production.get_pushes(args.project, revision=args.revision)[0]["id"]
|
||||
production_jobs = production.get_jobs(args.project, push_id=production_pushid, count=None)
|
||||
|
||||
production_dict = {}
|
||||
for job in production_jobs:
|
||||
production_dict[job["job_guid"]] = job
|
||||
|
||||
th_instance_dict = {}
|
||||
th_instance_not_found = []
|
||||
for job in th_instance_jobs:
|
||||
production_job = production_dict.get(job["job_guid"])
|
||||
if production_job is None:
|
||||
th_instance_not_found.append(job)
|
||||
else:
|
||||
# You can use this value in a url with &selectedJob=
|
||||
jobId = job["id"]
|
||||
remove_some_attributes(job, production_job)
|
||||
|
||||
differences = DeepDiff(job, production_dict[job["job_guid"]])
|
||||
if differences:
|
||||
pprint.pprint(differences)
|
||||
logger.info(jobId)
|
||||
else:
|
||||
# Delete jobs that don"t have any differences
|
||||
del production_dict[job["job_guid"]]
|
||||
|
||||
logger.info("We have found: %s jobs on %s instance.", len(th_instance_jobs), args.host)
|
||||
logger.info("We have found: %s jobs on the production instance.", len(production_jobs))
|
||||
|
||||
if production_dict:
|
||||
logger.info("There are the first 10 production jobs we do not have th_instancely. Follow the link to investigate.")
|
||||
for job in list(production_dict.values())[0:10]:
|
||||
print_url_to_taskcluster(job["job_guid"])
|
||||
|
||||
if th_instance_not_found:
|
||||
logger.info("Number of jobs not found th_instancely: %s jobs", len(th_instance_not_found))
|
||||
for job in th_instance_not_found:
|
||||
print_url_to_taskcluster(job["job_guid"])
|
||||
|
||||
if production_dict is None and th_instance_not_found is None:
|
||||
logger.info("We have not found any differences between the two pushes!! :D")
|
Загрузка…
Ссылка в новой задаче