зеркало из https://github.com/mozilla/bugbug.git
In the retrieval task, download only new or changed bugs
To support it, refactor bugzilla methods: - adding methods to get IDs given a query and given a time period; - renaming the internal _download method to get, since it's used externally; - changing delete to be more flexible and allowing to use a lambda to choose which bugs to delete. Fixes #440.
This commit is contained in:
Родитель
cb8aacd71a
Коммит
735fccc4a9
|
@ -22,6 +22,30 @@ db.register(
|
|||
1,
|
||||
)
|
||||
|
||||
PRODUCTS = (
|
||||
"Add-on SDK",
|
||||
"Android Background Services",
|
||||
"Core",
|
||||
"Core Graveyard",
|
||||
"DevTools",
|
||||
"DevTools Graveyard",
|
||||
"External Software Affecting Firefox",
|
||||
"Firefox",
|
||||
"Firefox Graveyard",
|
||||
"Firefox Build System",
|
||||
"Firefox for Android",
|
||||
"Firefox for Android Graveyard",
|
||||
# 'Firefox for iOS',
|
||||
"Firefox Health Report",
|
||||
# 'Focus',
|
||||
# 'Hello (Loop)',
|
||||
"NSPR",
|
||||
"NSS",
|
||||
"Toolkit",
|
||||
"Toolkit Graveyard",
|
||||
"WebExtensions",
|
||||
)
|
||||
|
||||
ATTACHMENT_INCLUDE_FIELDS = [
|
||||
"id",
|
||||
"is_obsolete",
|
||||
|
@ -59,7 +83,28 @@ def set_token(token):
|
|||
Bugzilla.TOKEN = token
|
||||
|
||||
|
||||
def _download(ids_or_query):
|
||||
def get_ids(params):
|
||||
assert "include_fields" not in params or params["include_fields"] == "id"
|
||||
|
||||
old_CHUNK_SIZE = Bugzilla.BUGZILLA_CHUNK_SIZE
|
||||
try:
|
||||
Bugzilla.BUGZILLA_CHUNK_SIZE = 7000
|
||||
|
||||
all_ids = []
|
||||
|
||||
def bughandler(bug):
|
||||
all_ids.append(bug["id"])
|
||||
|
||||
params["include_fields"] = "id"
|
||||
|
||||
Bugzilla(params, bughandler=bughandler).get_data().wait()
|
||||
finally:
|
||||
Bugzilla.BUGZILLA_CHUNK_SIZE = old_CHUNK_SIZE
|
||||
|
||||
return all_ids
|
||||
|
||||
|
||||
def get(ids_or_query):
|
||||
new_bugs = {}
|
||||
|
||||
def bughandler(bug):
|
||||
|
@ -107,31 +152,7 @@ def _download(ids_or_query):
|
|||
return new_bugs
|
||||
|
||||
|
||||
def download_bugs_between(date_from, date_to, security=False, store=True):
|
||||
products = {
|
||||
"Add-on SDK",
|
||||
"Android Background Services",
|
||||
"Core",
|
||||
"Core Graveyard",
|
||||
"DevTools",
|
||||
"DevTools Graveyard",
|
||||
"External Software Affecting Firefox",
|
||||
"Firefox",
|
||||
"Firefox Graveyard",
|
||||
"Firefox Build System",
|
||||
"Firefox for Android",
|
||||
"Firefox for Android Graveyard",
|
||||
# 'Firefox for iOS',
|
||||
"Firefox Health Report",
|
||||
# 'Focus',
|
||||
# 'Hello (Loop)',
|
||||
"NSPR",
|
||||
"NSS",
|
||||
"Toolkit",
|
||||
"Toolkit Graveyard",
|
||||
"WebExtensions",
|
||||
}
|
||||
|
||||
def get_ids_between(date_from, date_to, security=False):
|
||||
params = {
|
||||
"f1": "creation_ts",
|
||||
"o1": "greaterthan",
|
||||
|
@ -139,47 +160,14 @@ def download_bugs_between(date_from, date_to, security=False, store=True):
|
|||
"f2": "creation_ts",
|
||||
"o2": "lessthan",
|
||||
"v2": date_to.strftime("%Y-%m-%d"),
|
||||
"product": products,
|
||||
"product": PRODUCTS,
|
||||
}
|
||||
|
||||
if not security:
|
||||
params["f3"] = "bug_group"
|
||||
params["o3"] = "isempty"
|
||||
|
||||
params["count_only"] = 1
|
||||
r = requests.get("https://bugzilla.mozilla.org/rest/bug", params=params)
|
||||
r.raise_for_status()
|
||||
count = r.json()["bug_count"]
|
||||
del params["count_only"]
|
||||
|
||||
params["limit"] = 100
|
||||
params["order"] = "bug_id"
|
||||
|
||||
old_bug_ids = set(bug["id"] for bug in get_bugs())
|
||||
|
||||
all_bugs = []
|
||||
|
||||
with tqdm(total=count) as progress_bar:
|
||||
for offset in range(0, count, Bugzilla.BUGZILLA_CHUNK_SIZE):
|
||||
params["offset"] = offset
|
||||
|
||||
new_bugs = _download(params)
|
||||
|
||||
progress_bar.update(Bugzilla.BUGZILLA_CHUNK_SIZE)
|
||||
|
||||
all_bugs += [bug for bug in new_bugs.values()]
|
||||
|
||||
if store:
|
||||
db.append(
|
||||
BUGS_DB,
|
||||
(
|
||||
bug
|
||||
for bug_id, bug in new_bugs.items()
|
||||
if bug_id not in old_bug_ids
|
||||
),
|
||||
)
|
||||
|
||||
return all_bugs
|
||||
return get_ids(params)
|
||||
|
||||
|
||||
def download_bugs(bug_ids, products=None, security=False):
|
||||
|
@ -204,7 +192,7 @@ def download_bugs(bug_ids, products=None, security=False):
|
|||
)
|
||||
with tqdm(total=len(new_bug_ids)) as progress_bar:
|
||||
for chunk in chunks:
|
||||
new_bugs = _download(chunk)
|
||||
new_bugs = get(chunk)
|
||||
|
||||
progress_bar.update(len(chunk))
|
||||
|
||||
|
@ -225,8 +213,8 @@ def download_bugs(bug_ids, products=None, security=False):
|
|||
db.append(BUGS_DB, new_bugs.values())
|
||||
|
||||
|
||||
def delete_bugs(bug_ids):
|
||||
db.delete(BUGS_DB, lambda bug: bug["id"] in set(bug_ids))
|
||||
def delete_bugs(match):
|
||||
db.delete(BUGS_DB, match)
|
||||
|
||||
|
||||
def count_bugs(bug_query_params):
|
||||
|
|
|
@ -8,7 +8,7 @@ from logging import INFO, basicConfig, getLogger
|
|||
|
||||
from dateutil.relativedelta import relativedelta
|
||||
|
||||
from bugbug import bug_snapshot, bugzilla, labels
|
||||
from bugbug import bug_snapshot, bugzilla, db, labels
|
||||
from bugbug.utils import get_secret
|
||||
|
||||
basicConfig(level=INFO)
|
||||
|
@ -19,29 +19,55 @@ class Retriever(object):
|
|||
def retrieve_bugs(self):
|
||||
bugzilla.set_token(get_secret("BUGZILLA_TOKEN"))
|
||||
|
||||
db.download_version(bugzilla.BUGS_DB)
|
||||
if not db.is_old_version(bugzilla.BUGS_DB):
|
||||
db.download(bugzilla.BUGS_DB)
|
||||
|
||||
# Get IDs of bugs changed since last run.
|
||||
last_modified = db.last_modified(bugzilla.BUGS_DB)
|
||||
logger.info(
|
||||
f"Retrieving IDs of bugs modified since the last run on {last_modified}"
|
||||
)
|
||||
changed_ids = bugzilla.get_ids(
|
||||
{"f1": "delta_ts", "o1": "greaterthaneq", "v1": last_modified.date()}
|
||||
)
|
||||
logger.info(f"Retrieved {len(changed_ids)} IDs.")
|
||||
|
||||
# Get IDs of bugs between (two years and six months ago) and (six months ago).
|
||||
six_months_ago = datetime.utcnow() - relativedelta(months=6)
|
||||
two_years_and_six_months_ago = six_months_ago - relativedelta(years=2)
|
||||
logger.info(
|
||||
"Downloading bugs from {} to {}".format(
|
||||
two_years_and_six_months_ago, six_months_ago
|
||||
)
|
||||
f"Retrieving bug IDs from {two_years_and_six_months_ago} to {six_months_ago}"
|
||||
)
|
||||
bugzilla.download_bugs_between(two_years_and_six_months_ago, six_months_ago)
|
||||
timespan_ids = bugzilla.get_ids_between(
|
||||
two_years_and_six_months_ago, six_months_ago
|
||||
)
|
||||
logger.info(f"Retrieved {len(timespan_ids)} IDs.")
|
||||
|
||||
logger.info("Downloading labelled bugs")
|
||||
bug_ids = labels.get_all_bug_ids()
|
||||
bugzilla.download_bugs(bug_ids)
|
||||
# Get IDs of labelled bugs.
|
||||
labelled_bug_ids = labels.get_all_bug_ids()
|
||||
logger.info(f"{len(labelled_bug_ids)} labelled bugs to download.")
|
||||
|
||||
all_ids = set(timespan_ids + labelled_bug_ids)
|
||||
|
||||
# We have to redownload bugs that were changed since the last download.
|
||||
# We can remove from the DB the bugs that are outside of the considered timespan and are not labelled.
|
||||
bugzilla.delete_bugs(
|
||||
lambda bug: bug["id"] in changed_ids or bug["id"] not in all_ids
|
||||
)
|
||||
|
||||
bugzilla.download_bugs(timespan_ids + labelled_bug_ids)
|
||||
|
||||
# Try to re-download inconsistent bugs, up to three times.
|
||||
for i in range(3):
|
||||
bug_ids = bug_snapshot.get_inconsistencies()
|
||||
bug_ids = set(bug_snapshot.get_inconsistencies())
|
||||
if len(bug_ids) == 0:
|
||||
break
|
||||
|
||||
logger.info(
|
||||
f"Re-downloading {len(bug_ids)} bugs, as they were inconsistent"
|
||||
)
|
||||
bugzilla.delete_bugs(bug_ids)
|
||||
bugzilla.delete_bugs(lambda bug: bug["id"] in bug_ids)
|
||||
bugzilla.download_bugs(bug_ids)
|
||||
|
||||
self.compress_file("data/bugs.json")
|
||||
|
|
|
@ -17,10 +17,13 @@ try:
|
|||
with open("duplicate_test_bugs.json", "r") as f:
|
||||
test_bugs = json.load(f)
|
||||
except FileNotFoundError:
|
||||
test_bugs = bugzilla.download_bugs_between(
|
||||
datetime.now() - timedelta(days=21), datetime.now(), store=False
|
||||
test_bug_ids = bugzilla.get_ids_between(
|
||||
datetime.now() - timedelta(days=21), datetime.now()
|
||||
)
|
||||
test_bugs = [bug for bug in test_bugs if not bug["creator"] in REPORTERS_TO_IGNORE]
|
||||
test_bugs = bugzilla.get(test_bug_ids)
|
||||
test_bugs = [
|
||||
bug for bug in test_bugs.values() if not bug["creator"] in REPORTERS_TO_IGNORE
|
||||
]
|
||||
with open("duplicate_test_bugs.json", "w") as f:
|
||||
json.dump(test_bugs, f)
|
||||
|
||||
|
|
|
@ -22,15 +22,12 @@ def fetch_untriaged(args):
|
|||
|
||||
# Set bugzilla token and download bugs
|
||||
bugzilla.set_token(args.token)
|
||||
bug_ids = bugzilla.download_bugs_between(three_months_ago, today)
|
||||
bug_ids = bugzilla.get_ids_between(three_months_ago, today)
|
||||
bugs = bugzilla.get(bug_ids)
|
||||
|
||||
# Get untriaged bugs
|
||||
bugs = bugzilla.get_bugs()
|
||||
untriaged_bugs = []
|
||||
for bug in bugs:
|
||||
if bug["id"] not in bug_ids:
|
||||
continue
|
||||
|
||||
for bug in bugs.values():
|
||||
for history in bug["history"]:
|
||||
for change in history["changes"]:
|
||||
if (
|
||||
|
|
Загрузка…
Ссылка в новой задаче