bugbug/comment_level_labeler.py

109 строки
2.9 KiB
Python

# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
import argparse
import csv
import os
import random
from bugbug import bugzilla
parser = argparse.ArgumentParser()
parser.add_argument(
"--goal",
help="Goal of the labeler",
choices=["str", "regressionrange"],
default="str",
)
args = parser.parse_args()
if args.goal == "str":
from bugbug.models.bug import BugModel
model = BugModel.load("bugmodel")
elif args.goal == "regressionrange":
from bugbug.models.regression import RegressionModel
model = RegressionModel.load("regressionmodel")
file_path = os.path.join("bugbug", "labels", f"{args.goal}.csv")
with open(file_path, "r") as f:
reader = csv.reader(f)
next(reader)
labeled_comments = [(int(r[0]), int(r[1]), r[2]) for r in reader]
already_done = set((c[0], c[1]) for c in labeled_comments)
bugs = []
for bug in bugzilla.get_bugs():
# For the str and regressionrange problems, we don't care about test failures,
if (
"intermittent-failure" in bug["keywords"]
or "stockwell" in bug["whiteboard"]
or "permafail" in bug["summary"].lower()
):
continue
# bugs filed from Socorro,
if (
"this bug was filed from the socorro interface"
in bug["comments"][0]["text"].lower()
):
continue
# and fuzzing bugs.
if "fuzzing" in bug["comments"][0]["text"].lower():
continue
bugs.append(bug)
random.shuffle(bugs)
for bug in bugs:
# Only show bugs that are really bugs/regressions for labeling.
c = model.classify(bug)
if c != 1:
continue
v = None
for i, comment in enumerate(bug["comments"]):
if (bug["id"], i) in already_done:
continue
os.system("clear")
print(f'Bug {bug["id"]} - {bug["summary"]}')
print(f"Comment {i}")
print(comment["text"])
if args.goal == "str":
print(
"\nY for comment containing STR, N for comment not containing STR, K to skip, E to exit"
)
elif args.goal == "regressionrange":
print(
"\nY for comment containing regression range, N for comment not containing regression range, K to skip, E to exit"
)
v = input()
if v in ["e", "k"]:
break
if v in ["y", "n"]:
labeled_comments.append((bug["id"], i, v))
if v not in ["e", "k"]:
with open(file_path, "w") as f:
writer = csv.writer(f)
writer.writerow(["bug_id", "comment_num", f"has_{args.goal}"])
writer.writerows(sorted(labeled_comments))
print("\nE to exit, anything else to continue")
v = input()
if v == "e":
break