New misc script to compare pushes between Treeherder instances (#6089)

Renamed `compare_pushes.py` to `compare_tasks.py` since it is more appropiate.

`compare_pushes.py` compares the last 50 pushes of various projects for different Treeherder instances.
The output generates links to each instance and revision to visually compare.

```console
% ./misc/compare_pushes.py --projects android-components,fenix,reference-browser,servo-master,servo-auto,servo-try
Comparing android-components against production.
Comparing fenix against production.
Comparing reference-browser against production.
{"values_changed": {"root['push_timestamp']": {"new_value": 1582580346, "old_value": 1582581477}}}
https://treeherder.allizom.org/#/jobs?repo=reference-browser&revision=547a18b97534b237fa87bd22650f342836014c4e
https://treeherder.mozilla.org/#/jobs?repo=reference-browser&revision=547a18b97534b237fa87bd22650f342836014c4e
Comparing servo-master against production.
Comparing servo-auto against production.
Comparing servo-try against production.
```
This commit is contained in:
Armen Zambrano 2020-03-05 13:41:18 -05:00 коммит произвёл GitHub
Родитель a1d89b7657
Коммит 607727c76f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 166 добавлений и 93 удалений

Просмотреть файл

@ -1,18 +1,18 @@
#!/usr/bin/env python
""" Script to compare two pushes from different Treeherder instances"""
""" Script to compare pushes from a Treeherder instance against production.
This is useful to compare if pushes between two different instances have been
ingested differently.
"""
import argparse
import logging
import pprint
import uuid
import slugid
from deepdiff import DeepDiff
from thclient import TreeherderClient
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
HOSTS = {
"localhost": "http://localhost:8000",
"stage": "https://treeherder.allizom.org",
@ -20,97 +20,54 @@ HOSTS = {
}
def remove_some_attributes(job, production_job):
# I belive these differences are expected since they are dependant to when the data
# was inserted inside of the database
del job["build_platform_id"]
del job["id"]
del job["job_group_id"]
del job["job_type_id"]
del job["last_modified"]
del job["push_id"]
del job["result_set_id"]
del production_job["build_platform_id"]
del production_job["id"]
del production_job["job_group_id"]
del production_job["job_type_id"]
del production_job["last_modified"]
del production_job["push_id"]
del production_job["result_set_id"]
def main(args):
compare_to_client = TreeherderClient(server_url=HOSTS[args.host])
production_client = TreeherderClient(server_url=HOSTS["production"])
if job.get("end_timestamp"):
del job["end_timestamp"]
del job["start_timestamp"]
del production_job["end_timestamp"]
del production_job["start_timestamp"]
# Support comma separated projects
projects = args.projects.split(',')
for _project in projects:
logger.info("Comparing {} against production.".format(_project))
# Remove properties that are irrelevant for the comparison
pushes = compare_to_client.get_pushes(_project, count=50)
for _push in sorted(pushes, key=lambda push: push["revision"]):
del _push["id"]
for _rev in _push["revisions"]:
del _rev["result_set_id"]
if job.get("failure_classification_id"):
del job["failure_classification_id"]
del production_job["failure_classification_id"]
production_pushes = production_client.get_pushes(_project, count=50)
for _push in sorted(production_pushes, key=lambda push: push["revision"]):
del _push["id"]
for _rev in _push["revisions"]:
del _rev["result_set_id"]
for index in range(0, len(pushes)):
assert pushes[index]["revision"] == production_pushes[index]["revision"]
difference = DeepDiff(pushes[index], production_pushes[index])
if difference:
logger.info(difference.to_json())
logger.info("{}/#/jobs?repo={}&revision={}".format(
compare_to_client.server_url,
_project,
pushes[index]["revision"]))
logger.info("{}/#/jobs?repo={}&revision={}".format(
production_client.server_url,
_project,
production_pushes[index]["revision"]))
def print_url_to_taskcluster(job_guid):
job_guid = job["job_guid"]
(decoded_task_id, _) = job_guid.split("/")
# As of slugid v2, slugid.encode() returns a string not bytestring under Python 3.
taskId = slugid.encode(uuid.UUID(decoded_task_id))
logger.info("https://taskcluster-ui.herokuapp.com/tasks/%s", taskId)
def get_args():
parser = argparse.ArgumentParser("Compare a push from a Treeherder instance to the production instance.")
parser.add_argument("--host",
default="stage",
help="Host to compare. It defaults to stage")
parser.add_argument("--projects",
default="android-components,fenix",
help="Projects (comma separated) to compare. It defaults to android-components & fenix")
args = parser.parse_args()
return args
if __name__ == "__main__":
parser = argparse.ArgumentParser("Compare a push from a Treeherder instance to the production instance.")
parser.add_argument("--host", default="localhost",
help="Host to compare. It defaults to localhost")
parser.add_argument("--revision", required=True,
help="Revision to compare")
parser.add_argument("--project", default="mozilla-central",
help="Project to compare. It defaults to mozilla-central")
args = parser.parse_args()
th_instance = TreeherderClient(server_url=HOSTS[args.host])
th_instance_pushid = th_instance.get_pushes(args.project, revision=args.revision)[0]["id"]
th_instance_jobs = th_instance.get_jobs(args.project, push_id=th_instance_pushid, count=None) or []
production = TreeherderClient(server_url=HOSTS["production"])
production_pushid = production.get_pushes(args.project, revision=args.revision)[0]["id"]
production_jobs = production.get_jobs(args.project, push_id=production_pushid, count=None)
production_dict = {}
for job in production_jobs:
production_dict[job["job_guid"]] = job
th_instance_dict = {}
th_instance_not_found = []
for job in th_instance_jobs:
production_job = production_dict.get(job["job_guid"])
if production_job is None:
th_instance_not_found.append(job)
else:
# You can use this value in a url with &selectedJob=
jobId = job["id"]
remove_some_attributes(job, production_job)
differences = DeepDiff(job, production_dict[job["job_guid"]])
if differences:
pprint.pprint(differences)
logger.info(jobId)
else:
# Delete jobs that don"t have any differences
del production_dict[job["job_guid"]]
logger.info("We have found: %s jobs on %s instance.", len(th_instance_jobs), args.host)
logger.info("We have found: %s jobs on the production instance.", len(production_jobs))
if production_dict:
logger.info("There are the first 10 production jobs we do not have th_instancely. Follow the link to investigate.")
for job in list(production_dict.values())[0:10]:
print_url_to_taskcluster(job["job_guid"])
if th_instance_not_found:
logger.info("Number of jobs not found th_instancely: %s jobs", len(th_instance_not_found))
for job in th_instance_not_found:
print_url_to_taskcluster(job["job_guid"])
if production_dict is None and th_instance_not_found is None:
logger.info("We have not found any differences between the two pushes!! :D")
main(get_args())

116
misc/compare_tasks.py Executable file
Просмотреть файл

@ -0,0 +1,116 @@
#!/usr/bin/env python
""" Script to compare tasks from pushes on different Treeherder instances"""
import argparse
import logging
import pprint
import uuid
import slugid
from deepdiff import DeepDiff
from thclient import TreeherderClient
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
logger = logging.getLogger(__name__)
HOSTS = {
"localhost": "http://localhost:8000",
"stage": "https://treeherder.allizom.org",
"production": "https://treeherder.mozilla.org"
}
def remove_some_attributes(job, production_job):
# I belive these differences are expected since they are dependant to when the data
# was inserted inside of the database
del job["build_platform_id"]
del job["id"]
del job["job_group_id"]
del job["job_type_id"]
del job["last_modified"]
del job["push_id"]
del job["result_set_id"]
del production_job["build_platform_id"]
del production_job["id"]
del production_job["job_group_id"]
del production_job["job_type_id"]
del production_job["last_modified"]
del production_job["push_id"]
del production_job["result_set_id"]
if job.get("end_timestamp"):
del job["end_timestamp"]
del job["start_timestamp"]
del production_job["end_timestamp"]
del production_job["start_timestamp"]
if job.get("failure_classification_id"):
del job["failure_classification_id"]
del production_job["failure_classification_id"]
def print_url_to_taskcluster(job_guid):
job_guid = job["job_guid"]
(decoded_task_id, _) = job_guid.split("/")
# As of slugid v2, slugid.encode() returns a string not bytestring under Python 3.
taskId = slugid.encode(uuid.UUID(decoded_task_id))
logger.info("https://taskcluster-ui.herokuapp.com/tasks/%s", taskId)
if __name__ == "__main__":
parser = argparse.ArgumentParser("Compare a push from a Treeherder instance to the production instance.")
parser.add_argument("--host", default="localhost",
help="Host to compare. It defaults to localhost")
parser.add_argument("--revision", required=True,
help="Revision to compare")
parser.add_argument("--project", default="mozilla-central",
help="Project to compare. It defaults to mozilla-central")
args = parser.parse_args()
th_instance = TreeherderClient(server_url=HOSTS[args.host])
th_instance_pushid = th_instance.get_pushes(args.project, revision=args.revision)[0]["id"]
th_instance_jobs = th_instance.get_jobs(args.project, push_id=th_instance_pushid, count=None) or []
production = TreeherderClient(server_url=HOSTS["production"])
production_pushid = production.get_pushes(args.project, revision=args.revision)[0]["id"]
production_jobs = production.get_jobs(args.project, push_id=production_pushid, count=None)
production_dict = {}
for job in production_jobs:
production_dict[job["job_guid"]] = job
th_instance_dict = {}
th_instance_not_found = []
for job in th_instance_jobs:
production_job = production_dict.get(job["job_guid"])
if production_job is None:
th_instance_not_found.append(job)
else:
# You can use this value in a url with &selectedJob=
jobId = job["id"]
remove_some_attributes(job, production_job)
differences = DeepDiff(job, production_dict[job["job_guid"]])
if differences:
pprint.pprint(differences)
logger.info(jobId)
else:
# Delete jobs that don"t have any differences
del production_dict[job["job_guid"]]
logger.info("We have found: %s jobs on %s instance.", len(th_instance_jobs), args.host)
logger.info("We have found: %s jobs on the production instance.", len(production_jobs))
if production_dict:
logger.info("There are the first 10 production jobs we do not have th_instancely. Follow the link to investigate.")
for job in list(production_dict.values())[0:10]:
print_url_to_taskcluster(job["job_guid"])
if th_instance_not_found:
logger.info("Number of jobs not found th_instancely: %s jobs", len(th_instance_not_found))
for job in th_instance_not_found:
print_url_to_taskcluster(job["job_guid"])
if production_dict is None and th_instance_not_found is None:
logger.info("We have not found any differences between the two pushes!! :D")