зеркало из https://github.com/mozilla/bugbug.git
Generate Phabricator revisions DB similarly to bugs DB
This way the scripts using Phabricator revisions don't have to redownload revisions from Phabricator every time they run.
This commit is contained in:
Родитель
fc210ad23b
Коммит
f84945f86f
|
@ -3,9 +3,22 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from typing import Collection, Iterator, List, NewType
|
||||
|
||||
from libmozdata.phabricator import PhabricatorAPI
|
||||
from tqdm import tqdm
|
||||
|
||||
from bugbug import db
|
||||
|
||||
RevisionDict = NewType("RevisionDict", dict)
|
||||
|
||||
REVISIONS_DB = "data/revisions.json"
|
||||
db.register(
|
||||
REVISIONS_DB,
|
||||
"https://community-tc.services.mozilla.com/api/index/v1/task/project.bugbug.data_revisions.latest/artifacts/public/revisions.json.zst",
|
||||
1,
|
||||
)
|
||||
|
||||
PHABRICATOR_API = None
|
||||
|
||||
TESTING_PROJECTS = {
|
||||
|
@ -17,38 +30,54 @@ TESTING_PROJECTS = {
|
|||
}
|
||||
|
||||
|
||||
def set_api_key(url, api_key):
|
||||
def get_revisions() -> Iterator[RevisionDict]:
|
||||
yield from db.read(REVISIONS_DB)
|
||||
|
||||
|
||||
def set_api_key(url: str, api_key: str) -> None:
|
||||
global PHABRICATOR_API
|
||||
PHABRICATOR_API = PhabricatorAPI(api_key, url)
|
||||
|
||||
|
||||
def get(rev_ids):
|
||||
def get(rev_ids: Collection[int]) -> Collection[RevisionDict]:
|
||||
assert PHABRICATOR_API is not None
|
||||
|
||||
data = {}
|
||||
out = PHABRICATOR_API.request(
|
||||
"differential.revision.search",
|
||||
constraints={
|
||||
"ids": rev_ids,
|
||||
},
|
||||
attachments={"projects": True},
|
||||
)
|
||||
|
||||
rev_ids = list(set(rev_ids))
|
||||
rev_ids_groups = (rev_ids[i : i + 100] for i in range(0, len(rev_ids), 100))
|
||||
return out["data"]
|
||||
|
||||
with tqdm(total=len(rev_ids)) as progress_bar:
|
||||
|
||||
def download_revisions(rev_ids: Collection[int]) -> None:
|
||||
old_rev_count = 0
|
||||
new_rev_ids = set(int(rev_id) for rev_id in rev_ids)
|
||||
for rev in get_revisions():
|
||||
old_rev_count += 1
|
||||
if rev["id"] in new_rev_ids:
|
||||
new_rev_ids.remove(rev["id"])
|
||||
|
||||
print(f"Loaded {old_rev_count} revisions.")
|
||||
|
||||
new_rev_ids_list = sorted(list(new_rev_ids))
|
||||
rev_ids_groups = (
|
||||
new_rev_ids_list[i : i + 100] for i in range(0, len(new_rev_ids_list), 100)
|
||||
)
|
||||
|
||||
with tqdm(total=len(new_rev_ids)) as progress_bar:
|
||||
for rev_ids_group in rev_ids_groups:
|
||||
out = PHABRICATOR_API.request(
|
||||
"differential.revision.search",
|
||||
constraints={
|
||||
"ids": rev_ids_group,
|
||||
},
|
||||
attachments={"projects": True},
|
||||
)
|
||||
|
||||
for result in out["data"]:
|
||||
data[result["id"]] = result
|
||||
revisions = get(rev_ids_group)
|
||||
|
||||
progress_bar.update(len(rev_ids_group))
|
||||
|
||||
return data
|
||||
db.append(REVISIONS_DB, revisions)
|
||||
|
||||
|
||||
def get_testing_projects(rev):
|
||||
def get_testing_projects(rev: RevisionDict) -> List[str]:
|
||||
return [
|
||||
TESTING_PROJECTS[projectPHID]
|
||||
for projectPHID in rev["attachments"]["projects"]["projectPHIDs"]
|
||||
|
|
|
@ -194,6 +194,46 @@ tasks:
|
|||
owner: release-mgmt-analysis@mozilla.com
|
||||
source: ${repository}/raw/master/data-pipeline.yml
|
||||
|
||||
- ID: revisions-retrieval
|
||||
created: {$fromNow: ''}
|
||||
deadline: {$fromNow: '2 days'}
|
||||
expires: {$fromNow: '1 month'}
|
||||
provisionerId: proj-bugbug
|
||||
workerType: batch
|
||||
dependencies:
|
||||
- commit-retrieval
|
||||
payload:
|
||||
env:
|
||||
TC_SECRET_ID: project/bugbug/production
|
||||
maxRunTime: 86400
|
||||
image: mozilla/bugbug-base:${version}
|
||||
command:
|
||||
- "bugbug-data-revisions"
|
||||
|
||||
artifacts:
|
||||
public/revisions.json.zst:
|
||||
path: /data/revisions.json.zst
|
||||
type: file
|
||||
public/revisions.json.version:
|
||||
path: /data/revisions.json.version
|
||||
type: file
|
||||
|
||||
features:
|
||||
taskclusterProxy:
|
||||
true
|
||||
scopes:
|
||||
- "secrets:get:project/bugbug/production"
|
||||
routes:
|
||||
- notify.email.release-mgmt-analysis@mozilla.com.on-failed
|
||||
- notify.irc-channel.#bugbug.on-failed
|
||||
- index.project.bugbug.data_revisions.${version}
|
||||
- index.project.bugbug.data_revisions.latest
|
||||
metadata:
|
||||
name: bugbug revisions retrieval
|
||||
description: bugbug revisions retrieval
|
||||
owner: release-mgmt-analysis@mozilla.com
|
||||
source: ${repository}/raw/master/data-pipeline.yml
|
||||
|
||||
- ID: test-label-scheduling-history-push_data-retrieval
|
||||
created: {$fromNow: ''}
|
||||
deadline: {$fromNow: '3 days'}
|
||||
|
@ -1348,6 +1388,7 @@ tasks:
|
|||
expires: {$fromNow: '1 month'}
|
||||
dependencies:
|
||||
- past-bugs-by-unit
|
||||
- revisions-retrieval
|
||||
scopes:
|
||||
- hooks:modify-hook:project-bugbug/bugbug-landings-risk-report
|
||||
- assume:hook-id:project-bugbug/bugbug-landings-risk-report
|
||||
|
|
|
@ -67,6 +67,9 @@ class LandingsRiskReportGenerator(object):
|
|||
rev_start="children({})".format(commit["node"]),
|
||||
)
|
||||
|
||||
logger.info("Downloading revisions database...")
|
||||
assert db.download(phabricator.REVISIONS_DB)
|
||||
|
||||
logger.info("Downloading bugs database...")
|
||||
assert db.download(bugzilla.BUGS_DB)
|
||||
|
||||
|
@ -154,10 +157,18 @@ class LandingsRiskReportGenerator(object):
|
|||
}
|
||||
|
||||
logger.info("Retrieve Phabricator revisions linked to commits...")
|
||||
revisions = list(
|
||||
revision_ids = set(
|
||||
filter(None, (repository.get_revision_id(commit) for commit in commits))
|
||||
)
|
||||
revision_map = phabricator.get(revisions)
|
||||
|
||||
logger.info("Download revisions of interest...")
|
||||
phabricator.download_revisions(revision_ids)
|
||||
|
||||
revision_map = {
|
||||
revision["id"]: revision
|
||||
for revision in phabricator.get_revisions()
|
||||
if revision["id"] in revision_ids
|
||||
}
|
||||
|
||||
if meta_bugs is not None:
|
||||
blocker_to_meta = collections.defaultdict(set)
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
from logging import getLogger
|
||||
from typing import Optional
|
||||
|
||||
import dateutil.parser
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
from bugbug import db, phabricator, repository
|
||||
from bugbug.utils import get_secret, zstd_compress
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
class Retriever(object):
|
||||
def retrieve_revisions(self, limit: Optional[int] = None) -> None:
|
||||
phabricator.set_api_key(
|
||||
get_secret("PHABRICATOR_URL"), get_secret("PHABRICATOR_TOKEN")
|
||||
)
|
||||
|
||||
db.download(phabricator.REVISIONS_DB)
|
||||
|
||||
# Get the commits DB, as we need it to get the revision IDs linked to recent commits.
|
||||
assert db.download(repository.COMMITS_DB)
|
||||
|
||||
# Get IDs of revisions linked to commits since a year ago.
|
||||
start_date = datetime.now() - relativedelta(years=1)
|
||||
revision_ids = list(
|
||||
(
|
||||
filter(
|
||||
None,
|
||||
(
|
||||
repository.get_revision_id(commit)
|
||||
for commit in repository.get_commits()
|
||||
if dateutil.parser.parse(commit["pushdate"]) >= start_date
|
||||
),
|
||||
)
|
||||
)
|
||||
)
|
||||
if limit is not None:
|
||||
revision_ids = revision_ids[-limit:]
|
||||
|
||||
phabricator.download_revisions(revision_ids)
|
||||
|
||||
zstd_compress(phabricator.REVISIONS_DB)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
description = "Retrieve revisions from Phabricator"
|
||||
parser = argparse.ArgumentParser(description=description)
|
||||
parser.add_argument(
|
||||
"--limit",
|
||||
type=int,
|
||||
help="Only download the N oldest revisions, used mainly for integration tests",
|
||||
)
|
||||
|
||||
# Parse args to show the help if `--help` is passed
|
||||
args = parser.parse_args()
|
||||
|
||||
retriever = Retriever()
|
||||
retriever.retrieve_revisions(args.limit)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -38,6 +38,9 @@ class TestingPolicyStatsGenerator(object):
|
|||
rev_start="children({})".format(commit["node"]),
|
||||
)
|
||||
|
||||
logger.info("Downloading revisions database...")
|
||||
assert db.download(phabricator.REVISIONS_DB)
|
||||
|
||||
logger.info("Downloading bugs database...")
|
||||
assert db.download(bugzilla.BUGS_DB)
|
||||
|
||||
|
@ -63,10 +66,18 @@ class TestingPolicyStatsGenerator(object):
|
|||
commits = self.get_landed_since(days_start, days_end)
|
||||
|
||||
logger.info("Retrieve Phabricator revisions linked to commits...")
|
||||
revision_ids = list(
|
||||
revision_ids = set(
|
||||
filter(None, (repository.get_revision_id(commit) for commit in commits))
|
||||
)
|
||||
revision_map = phabricator.get(revision_ids)
|
||||
|
||||
logger.info("Download revisions of interest...")
|
||||
phabricator.download_revisions(revision_ids)
|
||||
|
||||
revision_map = {
|
||||
revision["id"]: revision
|
||||
for revision in phabricator.get_revisions()
|
||||
if revision["id"] in revision_ids
|
||||
}
|
||||
|
||||
logger.info("Download bugs of interest...")
|
||||
bugzilla.download_bugs(
|
||||
|
|
1
setup.py
1
setup.py
|
@ -46,6 +46,7 @@ setup(
|
|||
"bugbug-data-commits = scripts.commit_retriever:main",
|
||||
"bugbug-data-bugzilla = scripts.bug_retriever:main",
|
||||
"bugbug-data-test-scheduling-history = scripts.test_scheduling_history_retriever:main",
|
||||
"bugbug-data-revisions = scripts.revisions_retriever:main",
|
||||
"bugbug-train = scripts.trainer:main",
|
||||
"bugbug-train-similarity = scripts.similarity_trainer:main",
|
||||
"bugbug-check = scripts.check:main",
|
||||
|
|
Загрузка…
Ссылка в новой задаче