зеркало из https://github.com/mozilla/bugbug.git
Extract the logic to identify bug types into individual feature classes (#3907)
This commit is contained in:
Родитель
c598ee7ef2
Коммит
ca1bebafe3
|
@ -16,7 +16,7 @@ from libmozdata import versions
|
|||
from libmozdata.bugzilla import Bugzilla
|
||||
from sklearn.base import BaseEstimator, TransformerMixin
|
||||
|
||||
from bugbug import bug_snapshot, repository
|
||||
from bugbug import bug_snapshot, bugzilla, repository
|
||||
|
||||
|
||||
def field(bug, field):
|
||||
|
@ -687,3 +687,192 @@ class BugExtractor(BaseEstimator, TransformerMixin):
|
|||
bugs_iter = apply_rollback(bugs_iter)
|
||||
|
||||
return pd.DataFrame(apply_transform(bug) for bug in bugs_iter)
|
||||
|
||||
|
||||
class IsPerformanceBug(SingleBugFeature):
|
||||
"""Determine if the bug is related to performance based on given bug data."""
|
||||
|
||||
name = "Is Performance Bug"
|
||||
type_name = "performance"
|
||||
keyword_prefixes = ("perf", "topperf", "main-thread-io")
|
||||
whiteboard_prefixes = (
|
||||
"[fxperf",
|
||||
"[fxperfsize",
|
||||
"[snappy",
|
||||
"[pdfjs-c-performance",
|
||||
"[pdfjs-performance",
|
||||
"[sp3",
|
||||
)
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
bug: bugzilla.BugDict,
|
||||
bug_map: dict[int, bugzilla.BugDict] | None = None,
|
||||
) -> bool:
|
||||
if bug.get("cf_performance_impact") in ("low", "medium", "high"):
|
||||
return True
|
||||
|
||||
if any(
|
||||
keyword.startswith(prefix)
|
||||
for keyword in bug["keywords"]
|
||||
for prefix in self.keyword_prefixes
|
||||
):
|
||||
return True
|
||||
|
||||
bug_whiteboard = bug["whiteboard"].lower()
|
||||
if any(prefix in bug_whiteboard for prefix in self.whiteboard_prefixes):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class IsMemoryBug(SingleBugFeature):
|
||||
"""Determine if the bug is related to memory based on given bug data."""
|
||||
|
||||
name = "Is Memory Bug"
|
||||
type_name = "memory"
|
||||
keyword_prefixes = ("memory-",)
|
||||
whiteboard_prefixes = ("[overhead", "[memshrink")
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
bug: bugzilla.BugDict,
|
||||
bug_map: dict[int, bugzilla.BugDict] | None = None,
|
||||
) -> bool:
|
||||
if bug_map is not None:
|
||||
for bug_id in bug["blocks"]:
|
||||
if bug_id not in bug_map:
|
||||
continue
|
||||
|
||||
alias = bug_map[bug_id]["alias"]
|
||||
if alias and alias.startswith("memshrink"):
|
||||
return True
|
||||
|
||||
if any(
|
||||
keyword.startswith(prefix)
|
||||
for keyword in bug["keywords"]
|
||||
for prefix in self.keyword_prefixes
|
||||
):
|
||||
return True
|
||||
|
||||
bug_whiteboard = bug["whiteboard"].lower()
|
||||
if any(prefix in bug_whiteboard for prefix in self.whiteboard_prefixes):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class IsPowerBug(SingleBugFeature):
|
||||
"""Determine if the bug is related to power based on given bug data."""
|
||||
|
||||
name = "Is Power Bug"
|
||||
type_name = "power"
|
||||
keyword_prefixes = ("power",)
|
||||
whiteboard_prefixes = ("[power",)
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
bug: bugzilla.BugDict,
|
||||
bug_map: dict[int, bugzilla.BugDict] | None = None,
|
||||
) -> bool:
|
||||
if any(
|
||||
keyword.startswith(prefix)
|
||||
for keyword in bug["keywords"]
|
||||
for prefix in self.keyword_prefixes
|
||||
):
|
||||
return True
|
||||
|
||||
bug_whiteboard = bug["whiteboard"].lower()
|
||||
if any(prefix in bug_whiteboard for prefix in self.whiteboard_prefixes):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class IsSecurityBug(SingleBugFeature):
|
||||
"""Determine if the bug is related to security based on given bug data."""
|
||||
|
||||
name = "Is Security Bug"
|
||||
type_name = "security"
|
||||
keyword_prefixes = ("sec-", "csectype-")
|
||||
whiteboard_prefixes = ("[client-bounty-form", "[sec-survey")
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
bug: bugzilla.BugDict,
|
||||
bug_map: dict[int, bugzilla.BugDict] | None = None,
|
||||
) -> bool:
|
||||
if any(
|
||||
keyword.startswith(prefix)
|
||||
for keyword in bug["keywords"]
|
||||
for prefix in self.keyword_prefixes
|
||||
):
|
||||
return True
|
||||
|
||||
bug_whiteboard = bug["whiteboard"].lower()
|
||||
if any(prefix in bug_whiteboard for prefix in self.whiteboard_prefixes):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class IsCrashBug(SingleBugFeature):
|
||||
"""Determine if the bug is related to crash based on given bug data."""
|
||||
|
||||
name = "Is Crash Bug"
|
||||
type_name = "crash"
|
||||
keyword_prefixes = ("crash", "crashreportid")
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
bug: bugzilla.BugDict,
|
||||
bug_map: dict[int, bugzilla.BugDict] | None = None,
|
||||
) -> bool:
|
||||
# Checking for `[@` will exclude some bugs that do not have valid
|
||||
# signatures: https://mzl.la/46XAqRF
|
||||
if bug.get("cf_crash_signature") and "[@" in bug["cf_crash_signature"]:
|
||||
return True
|
||||
|
||||
if any(
|
||||
keyword.startswith(prefix)
|
||||
for keyword in bug["keywords"]
|
||||
for prefix in self.keyword_prefixes
|
||||
):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class BugTypes(SingleBugFeature):
|
||||
"""Determine bug type."""
|
||||
|
||||
name = "Infer Bug Type"
|
||||
bug_type_extractors: list = [
|
||||
IsCrashBug(),
|
||||
IsMemoryBug(),
|
||||
IsPerformanceBug(),
|
||||
IsPowerBug(),
|
||||
IsSecurityBug(),
|
||||
]
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
bug: bugzilla.BugDict,
|
||||
bug_map: dict[int, bugzilla.BugDict] | None = None,
|
||||
) -> list[str]:
|
||||
"""Infer bug types based on various bug characteristics.
|
||||
|
||||
Args:
|
||||
- bug (bugzilla.BugDict): A dictionary containing bug data.
|
||||
- bug_map (Optional[dict[int, bugzilla.BugDict]]): A mapping
|
||||
of bug IDs to bug dictionaries. Default is None.
|
||||
|
||||
Returns:
|
||||
- list[str]: A list of inferred bug types (e.g., "memory", "power",
|
||||
"performance", "security", "crash").
|
||||
"""
|
||||
return [
|
||||
is_type.type_name
|
||||
for is_type in self.bug_type_extractors
|
||||
if is_type(bug, bug_map)
|
||||
]
|
||||
|
|
|
@ -18,79 +18,6 @@ from bugbug.model import BugModel
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
KEYWORD_DICT = {
|
||||
"sec-": "security",
|
||||
"csectype-": "security",
|
||||
"memory-": "memory",
|
||||
"crash": "crash",
|
||||
"crashreportid": "crash",
|
||||
"perf": "performance",
|
||||
"topperf": "performance",
|
||||
"main-thread-io": "performance",
|
||||
"power": "power",
|
||||
}
|
||||
TYPE_LIST = sorted(set(KEYWORD_DICT.values()))
|
||||
|
||||
|
||||
def bug_to_types(
|
||||
bug: bugzilla.BugDict, bug_map: dict[int, bugzilla.BugDict] | None = None
|
||||
) -> list[str]:
|
||||
types = set()
|
||||
|
||||
bug_whiteboard = bug["whiteboard"].lower()
|
||||
|
||||
if any(
|
||||
f"{whiteboard_text}" in bug_whiteboard
|
||||
for whiteboard_text in ("overhead", "memshrink")
|
||||
):
|
||||
types.add("memory")
|
||||
|
||||
if "[power" in bug_whiteboard:
|
||||
types.add("power")
|
||||
|
||||
if any(
|
||||
f"[{whiteboard_text}" in bug_whiteboard
|
||||
for whiteboard_text in (
|
||||
"fxperf",
|
||||
"fxperfsize",
|
||||
"snappy",
|
||||
"pdfjs-c-performance",
|
||||
"pdfjs-performance",
|
||||
"sp3",
|
||||
)
|
||||
):
|
||||
types.add("performance")
|
||||
|
||||
if any(
|
||||
f"[{whiteboard_text}" in bug_whiteboard
|
||||
for whiteboard_text in ("client-bounty-form", "sec-survey")
|
||||
):
|
||||
types.add("security")
|
||||
|
||||
if "cf_performance_impact" in bug and bug["cf_performance_impact"] not in (
|
||||
"---",
|
||||
"?",
|
||||
):
|
||||
types.add("performance")
|
||||
|
||||
if "cf_crash_signature" in bug and bug["cf_crash_signature"] not in ("", "---"):
|
||||
types.add("crash")
|
||||
|
||||
if bug_map is not None:
|
||||
for bug_id in bug["blocks"]:
|
||||
if bug_id not in bug_map:
|
||||
continue
|
||||
|
||||
alias = bug_map[bug_id]["alias"]
|
||||
if alias and alias.startswith("memshrink"):
|
||||
types.add("memory")
|
||||
|
||||
for keyword_start, type in KEYWORD_DICT.items():
|
||||
if any(keyword.startswith(keyword_start) for keyword in bug["keywords"]):
|
||||
types.add(type)
|
||||
|
||||
return list(types)
|
||||
|
||||
|
||||
class BugTypeModel(BugModel):
|
||||
def __init__(self, lemmatization=False, historical=False):
|
||||
|
@ -98,12 +25,20 @@ class BugTypeModel(BugModel):
|
|||
|
||||
self.calculate_importance = False
|
||||
|
||||
self.bug_type_extractors = bug_features.BugTypes.bug_type_extractors
|
||||
|
||||
label_keyword_prefixes = {
|
||||
keyword
|
||||
for extractor in self.bug_type_extractors
|
||||
for keyword in extractor.keyword_prefixes
|
||||
}
|
||||
|
||||
feature_extractors = [
|
||||
bug_features.HasSTR(),
|
||||
bug_features.Severity(),
|
||||
# Ignore keywords that would make the ML completely skewed
|
||||
# (we are going to use them as 100% rules in the evaluation phase).
|
||||
bug_features.Keywords(set(KEYWORD_DICT.keys())),
|
||||
bug_features.Keywords(label_keyword_prefixes),
|
||||
bug_features.IsCoverityIssue(),
|
||||
bug_features.HasCrashSignature(),
|
||||
bug_features.HasURL(),
|
||||
|
@ -170,20 +105,23 @@ class BugTypeModel(BugModel):
|
|||
bug_map = {bug["id"]: bug for bug in bugzilla.get_bugs()}
|
||||
|
||||
for bug_data in bug_map.values():
|
||||
target = np.zeros(len(TYPE_LIST))
|
||||
for type_ in bug_to_types(bug_data, bug_map):
|
||||
target[TYPE_LIST.index(type_)] = 1
|
||||
target = np.zeros(len(self.bug_type_extractors))
|
||||
for i, is_type in enumerate(self.bug_type_extractors):
|
||||
if is_type(bug_data, bug_map):
|
||||
target[i] = 1
|
||||
|
||||
classes[int(bug_data["id"])] = target
|
||||
|
||||
for type_ in TYPE_LIST:
|
||||
bug_types = [extractor.type_name for extractor in self.bug_type_extractors]
|
||||
|
||||
for i, bug_type in enumerate(bug_types):
|
||||
logger.info(
|
||||
"%d %s bugs",
|
||||
sum(target[TYPE_LIST.index(type_)] == 1 for target in classes.values()),
|
||||
type_,
|
||||
sum(target[i] for target in classes.values()),
|
||||
bug_type,
|
||||
)
|
||||
|
||||
return classes, TYPE_LIST
|
||||
return classes, bug_types
|
||||
|
||||
def get_feature_names(self):
|
||||
return self.clf.named_steps["union"].get_feature_names_out()
|
||||
|
@ -194,11 +132,14 @@ class BugTypeModel(BugModel):
|
|||
classes: dict[int, np.ndarray],
|
||||
probabilities: bool,
|
||||
):
|
||||
bug_map = {bug["id"]: bug for bug in bugs}
|
||||
|
||||
for i, bug in enumerate(bugs):
|
||||
for type_ in bug_to_types(bug):
|
||||
if probabilities:
|
||||
classes[i][TYPE_LIST.index(type_)] = 1.0
|
||||
else:
|
||||
classes[i][TYPE_LIST.index(type_)] = 1
|
||||
for j, is_type_applicable in enumerate(self.bug_type_extractors):
|
||||
if is_type_applicable(bug, bug_map):
|
||||
if probabilities:
|
||||
classes[i][j] = 1.0
|
||||
else:
|
||||
classes[i][j] = 1
|
||||
|
||||
return classes
|
||||
|
|
|
@ -30,7 +30,6 @@ from dateutil.relativedelta import relativedelta
|
|||
from tqdm import tqdm
|
||||
|
||||
from bugbug import bug_features, bugzilla, db, phabricator, repository, test_scheduling
|
||||
from bugbug.models.bugtype import bug_to_types
|
||||
from bugbug.models.regressor import BUG_FIXING_COMMITS_DB, RegressorModel
|
||||
from bugbug.utils import (
|
||||
download_check_etag,
|
||||
|
@ -555,6 +554,8 @@ class LandingsRiskReportGenerator(object):
|
|||
|
||||
component_team_mapping = get_component_team_mapping()
|
||||
|
||||
bug_to_types = bug_features.BugTypes()
|
||||
|
||||
bug_summaries = []
|
||||
for bug_id in bugs:
|
||||
if bug_id not in bug_map:
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
{"keywords": ["meta", "perf"], "whiteboard": "", "cf_crash_signature": ""}
|
||||
{"keywords": ["memory-leak", "regression"], "whiteboard": "[MemShrink:P1]", "cf_crash_signature": ""}
|
||||
{"whiteboard": "", "keywords": ["power"]}
|
||||
{"keywords": ["sec-want"], "whiteboard": "[sg:want][psm-padlock]"}
|
||||
{"keywords": ["crash", "regression"], "whiteboard": "", "cf_crash_signature": "[@ audiounit_property_listener_callback]"}
|
|
@ -12,6 +12,7 @@ from bugbug.bug_features import (
|
|||
BlockedBugsNumber,
|
||||
BugExtractor,
|
||||
BugReporter,
|
||||
BugTypes,
|
||||
CommentCount,
|
||||
CommentLength,
|
||||
Component,
|
||||
|
@ -178,3 +179,11 @@ def test_BugExtractor():
|
|||
BugExtractor([HasSTR(), HasSTR()], [fileref(), url()])
|
||||
with pytest.raises(AssertionError):
|
||||
BugExtractor([HasSTR(), HasURL()], [fileref(), fileref()])
|
||||
|
||||
|
||||
def test_BugTypes(read) -> None:
|
||||
read(
|
||||
"bug_types.json",
|
||||
BugTypes,
|
||||
[["performance"], ["memory"], ["power"], ["security"], ["crash"]],
|
||||
)
|
||||
|
|
Загрузка…
Ссылка в новой задаче