зеркало из https://github.com/mozilla/bugbug.git
[New Model] Performance Bug Model (#3895)
This commit is contained in:
Родитель
799994add8
Коммит
e31823744e
|
@ -22,6 +22,7 @@ MODELS = {
|
|||
"fixtime": "bugbug.models.fixtime.FixTimeModel",
|
||||
"invalidcompatibilityreport": "bugbug.models.invalid_compatibility_report.InvalidCompatibilityReportModel",
|
||||
"needsdiagnosis": "bugbug.models.needsdiagnosis.NeedsDiagnosisModel",
|
||||
"performancebug": "bugbug.models.performancebug.PerformanceBugModel",
|
||||
"qaneeded": "bugbug.models.qaneeded.QANeededModel",
|
||||
"rcatype": "bugbug.models.rcatype.RCATypeModel",
|
||||
"regression": "bugbug.models.regression.RegressionModel",
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import logging
|
||||
|
||||
import xgboost
|
||||
from imblearn.over_sampling import BorderlineSMOTE
|
||||
from imblearn.pipeline import Pipeline as ImblearnPipeline
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.feature_extraction import DictVectorizer
|
||||
from sklearn.pipeline import Pipeline
|
||||
|
||||
from bugbug import bug_features, bugzilla, feature_cleanup, utils
|
||||
from bugbug.model import BugModel
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PerformanceBugModel(BugModel):
|
||||
def __init__(self, lemmatization=False):
|
||||
BugModel.__init__(self, lemmatization)
|
||||
|
||||
self.calculate_importance = False
|
||||
|
||||
feature_extractors = [
|
||||
bug_features.HasSTR(),
|
||||
bug_features.Keywords(
|
||||
prefixes_to_ignore=bug_features.IsPerformanceBug.keyword_prefixes
|
||||
),
|
||||
bug_features.IsCoverityIssue(),
|
||||
bug_features.HasCrashSignature(),
|
||||
bug_features.HasURL(),
|
||||
bug_features.HasW3CURL(),
|
||||
bug_features.HasGithubURL(),
|
||||
bug_features.Product(),
|
||||
bug_features.HasRegressionRange(),
|
||||
bug_features.HasCVEInAlias(),
|
||||
bug_features.HasAttachment(),
|
||||
bug_features.FiledVia(),
|
||||
]
|
||||
|
||||
cleanup_functions = [
|
||||
feature_cleanup.fileref(),
|
||||
feature_cleanup.url(),
|
||||
feature_cleanup.synonyms(),
|
||||
feature_cleanup.hex(),
|
||||
feature_cleanup.dll(),
|
||||
feature_cleanup.crash(),
|
||||
]
|
||||
|
||||
self.extraction_pipeline = Pipeline(
|
||||
[
|
||||
(
|
||||
"bug_extractor",
|
||||
bug_features.BugExtractor(
|
||||
feature_extractors, cleanup_functions, rollback=True
|
||||
),
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
self.clf = ImblearnPipeline(
|
||||
[
|
||||
(
|
||||
"union",
|
||||
ColumnTransformer(
|
||||
[
|
||||
("data", DictVectorizer(), "data"),
|
||||
("title", self.text_vectorizer(min_df=0.0001), "title"),
|
||||
(
|
||||
"first_comment",
|
||||
self.text_vectorizer(min_df=0.0001),
|
||||
"first_comment",
|
||||
),
|
||||
]
|
||||
),
|
||||
),
|
||||
("sampler", BorderlineSMOTE(random_state=0)),
|
||||
(
|
||||
"estimator",
|
||||
xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count()),
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
def get_labels(self):
|
||||
classes = {}
|
||||
is_performance_bug = bug_features.IsPerformanceBug()
|
||||
|
||||
for bug_data in bugzilla.get_bugs():
|
||||
bug_id = int(bug_data["id"])
|
||||
|
||||
if "cf_performance_impact" not in bug_data or bug_data[
|
||||
"cf_performance_impact"
|
||||
] in ("?", "none"):
|
||||
continue
|
||||
|
||||
classes[bug_id] = 1 if is_performance_bug(bug_data) else 0
|
||||
|
||||
logger.info(
|
||||
"%d performance bugs",
|
||||
sum(label == 1 for label in classes.values()),
|
||||
)
|
||||
logger.info(
|
||||
"%d non-performance bugs",
|
||||
sum(label == 0 for label in classes.values()),
|
||||
)
|
||||
|
||||
return classes, [0, 1]
|
||||
|
||||
def get_feature_names(self):
|
||||
return self.clf.named_steps["union"].get_feature_names_out()
|
||||
|
||||
def overwrite_classes(self, bugs, classes, probabilities):
|
||||
is_performance_bug = bug_features.IsPerformanceBug()
|
||||
|
||||
for i, bug in enumerate(bugs):
|
||||
if is_performance_bug(bug):
|
||||
classes[i] = [1.0, 0.0] if probabilities else 1
|
||||
|
||||
return classes
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -0,0 +1,15 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from bugbug.models.performancebug import PerformanceBugModel
|
||||
|
||||
|
||||
def test_get_performancebug_labels():
|
||||
model = PerformanceBugModel()
|
||||
classes, _ = model.get_labels()
|
||||
assert classes[1461247] == 1
|
||||
assert classes[1457988] == 1
|
||||
assert classes[446261] == 0
|
||||
assert classes[452258] == 0
|
Загрузка…
Ссылка в новой задаче