зеркало из https://github.com/mozilla/bugbug.git
[inline_comments_data_collection] Add CL arguments for patch and diff length thresholds (#4470)
This commit is contained in:
Родитель
b63e883762
Коммит
6bfef0ce42
|
@ -1,3 +1,4 @@
|
|||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
|
@ -118,7 +119,7 @@ def extract_relevant_diff(patch_diff, filename):
|
|||
return None
|
||||
|
||||
|
||||
def process_comments(patch_threshold, diff_length_threshold):
|
||||
def process_comments(limit, diff_length_limit):
|
||||
patch_count = 0
|
||||
|
||||
for patch_id, comments in review_data.get_all_inline_comments(lambda c: True):
|
||||
|
@ -159,7 +160,7 @@ def process_comments(patch_threshold, diff_length_threshold):
|
|||
logger.error(f"Failed to fetch diff: {e}")
|
||||
continue
|
||||
|
||||
if len(patch_diff) > diff_length_threshold:
|
||||
if len(patch_diff) > diff_length_limit:
|
||||
continue
|
||||
|
||||
relevant_diff = extract_relevant_diff(patch_diff, comment.filename)
|
||||
|
@ -178,16 +179,35 @@ def process_comments(patch_threshold, diff_length_threshold):
|
|||
yield data
|
||||
|
||||
patch_count += 1
|
||||
if patch_count >= patch_threshold:
|
||||
if patch_count >= limit:
|
||||
break
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Process patch reviews.")
|
||||
parser.add_argument(
|
||||
"--limit",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Limit the number of patches to process. No limit if not specified.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--diff-length-limit",
|
||||
type=int,
|
||||
default=1000,
|
||||
help="Limit the maximum allowed diff length. No limit if not specified.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
limit = args.limit or float("inf")
|
||||
diff_length_limit = args.diff_length_limit or float("inf")
|
||||
|
||||
os.makedirs("patches", exist_ok=True)
|
||||
os.makedirs("data", exist_ok=True)
|
||||
|
||||
with open(phabricator.FIXED_COMMENTS_DB, "wb") as dataset_file_handle:
|
||||
for data in process_comments(patch_threshold=1000, diff_length_threshold=5000):
|
||||
for data in process_comments(limit=limit, diff_length_limit=diff_length_limit):
|
||||
dataset_file_handle.write(orjson.dumps(data) + b"\n")
|
||||
|
||||
zstd_compress(phabricator.FIXED_COMMENTS_DB)
|
||||
|
|
|
@ -22,6 +22,14 @@ ls -lh data
|
|||
# Removes it to ensure the commit retrieval work as expected
|
||||
rm data/commit*
|
||||
|
||||
# Then generate a test dataset of fixed inline comments
|
||||
bugbug-fixed-comments --limit 150
|
||||
ls -lh
|
||||
ls -lh data
|
||||
|
||||
# Remove DB to ensure it works as expected
|
||||
rm data/fixed_comments.json
|
||||
|
||||
# Then retrieve a subset of commit data
|
||||
bugbug-data-commits --limit 500 "${CACHE_DIR:-cache}"
|
||||
test -d ${CACHE_DIR:-cache}/mozilla-central
|
||||
|
|
1
setup.py
1
setup.py
|
@ -62,6 +62,7 @@ setup(
|
|||
"bugbug-generate-landings-risk-report = scripts.generate_landings_risk_report:main",
|
||||
"bugbug-shadow-scheduler-stats = scripts.shadow_scheduler_stats:main",
|
||||
"bugbug-data-github = scripts.github_issue_retriever:main",
|
||||
"bugbug-fixed-comments = scripts.inline_comments_data_collection:main",
|
||||
]
|
||||
},
|
||||
classifiers=[
|
||||
|
|
Загрузка…
Ссылка в новой задаче