зеркало из https://github.com/mozilla/bugbug.git
[inline_comments_data_collection] Add CL arguments for patch and diff length thresholds (#4470)
This commit is contained in:
Родитель
b63e883762
Коммит
6bfef0ce42
|
@ -1,3 +1,4 @@
|
||||||
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
@ -118,7 +119,7 @@ def extract_relevant_diff(patch_diff, filename):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def process_comments(patch_threshold, diff_length_threshold):
|
def process_comments(limit, diff_length_limit):
|
||||||
patch_count = 0
|
patch_count = 0
|
||||||
|
|
||||||
for patch_id, comments in review_data.get_all_inline_comments(lambda c: True):
|
for patch_id, comments in review_data.get_all_inline_comments(lambda c: True):
|
||||||
|
@ -159,7 +160,7 @@ def process_comments(patch_threshold, diff_length_threshold):
|
||||||
logger.error(f"Failed to fetch diff: {e}")
|
logger.error(f"Failed to fetch diff: {e}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if len(patch_diff) > diff_length_threshold:
|
if len(patch_diff) > diff_length_limit:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
relevant_diff = extract_relevant_diff(patch_diff, comment.filename)
|
relevant_diff = extract_relevant_diff(patch_diff, comment.filename)
|
||||||
|
@ -178,16 +179,35 @@ def process_comments(patch_threshold, diff_length_threshold):
|
||||||
yield data
|
yield data
|
||||||
|
|
||||||
patch_count += 1
|
patch_count += 1
|
||||||
if patch_count >= patch_threshold:
|
if patch_count >= limit:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(description="Process patch reviews.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--limit",
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help="Limit the number of patches to process. No limit if not specified.",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--diff-length-limit",
|
||||||
|
type=int,
|
||||||
|
default=1000,
|
||||||
|
help="Limit the maximum allowed diff length. No limit if not specified.",
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
limit = args.limit or float("inf")
|
||||||
|
diff_length_limit = args.diff_length_limit or float("inf")
|
||||||
|
|
||||||
os.makedirs("patches", exist_ok=True)
|
os.makedirs("patches", exist_ok=True)
|
||||||
os.makedirs("data", exist_ok=True)
|
os.makedirs("data", exist_ok=True)
|
||||||
|
|
||||||
with open(phabricator.FIXED_COMMENTS_DB, "wb") as dataset_file_handle:
|
with open(phabricator.FIXED_COMMENTS_DB, "wb") as dataset_file_handle:
|
||||||
for data in process_comments(patch_threshold=1000, diff_length_threshold=5000):
|
for data in process_comments(limit=limit, diff_length_limit=diff_length_limit):
|
||||||
dataset_file_handle.write(orjson.dumps(data) + b"\n")
|
dataset_file_handle.write(orjson.dumps(data) + b"\n")
|
||||||
|
|
||||||
zstd_compress(phabricator.FIXED_COMMENTS_DB)
|
zstd_compress(phabricator.FIXED_COMMENTS_DB)
|
||||||
|
|
|
@ -22,6 +22,14 @@ ls -lh data
|
||||||
# Removes it to ensure the commit retrieval work as expected
|
# Removes it to ensure the commit retrieval work as expected
|
||||||
rm data/commit*
|
rm data/commit*
|
||||||
|
|
||||||
|
# Then generate a test dataset of fixed inline comments
|
||||||
|
bugbug-fixed-comments --limit 150
|
||||||
|
ls -lh
|
||||||
|
ls -lh data
|
||||||
|
|
||||||
|
# Remove DB to ensure it works as expected
|
||||||
|
rm data/fixed_comments.json
|
||||||
|
|
||||||
# Then retrieve a subset of commit data
|
# Then retrieve a subset of commit data
|
||||||
bugbug-data-commits --limit 500 "${CACHE_DIR:-cache}"
|
bugbug-data-commits --limit 500 "${CACHE_DIR:-cache}"
|
||||||
test -d ${CACHE_DIR:-cache}/mozilla-central
|
test -d ${CACHE_DIR:-cache}/mozilla-central
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -62,6 +62,7 @@ setup(
|
||||||
"bugbug-generate-landings-risk-report = scripts.generate_landings_risk_report:main",
|
"bugbug-generate-landings-risk-report = scripts.generate_landings_risk_report:main",
|
||||||
"bugbug-shadow-scheduler-stats = scripts.shadow_scheduler_stats:main",
|
"bugbug-shadow-scheduler-stats = scripts.shadow_scheduler_stats:main",
|
||||||
"bugbug-data-github = scripts.github_issue_retriever:main",
|
"bugbug-data-github = scripts.github_issue_retriever:main",
|
||||||
|
"bugbug-fixed-comments = scripts.inline_comments_data_collection:main",
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
classifiers=[
|
classifiers=[
|
||||||
|
|
Загрузка…
Ссылка в новой задаче