зеркало из https://github.com/mozilla/bugbug.git
Add a RegressionRange model (#449)
This commit is contained in:
Родитель
33f6ae83ce
Коммит
24c805e64e
|
@ -21,6 +21,7 @@ MODELS = {
|
|||
"stepstoreproduce": "bugbug.models.stepstoreproduce.StepsToReproduceModel",
|
||||
"tracking": "bugbug.models.tracking.TrackingModel",
|
||||
"uplift": "bugbug.models.uplift.UpliftModel",
|
||||
"regressionrange": "bugbug.models.regressionrange.RegressionRangeModel",
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,91 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import xgboost
|
||||
from imblearn.under_sampling import RandomUnderSampler
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.feature_extraction import DictVectorizer
|
||||
from sklearn.pipeline import Pipeline
|
||||
|
||||
from bugbug import bug_features, bugzilla, feature_cleanup
|
||||
from bugbug.model import BugModel
|
||||
|
||||
|
||||
class RegressionRangeModel(BugModel):
|
||||
def __init__(self, lemmatization=False):
|
||||
BugModel.__init__(self, lemmatization)
|
||||
|
||||
self.sampler = RandomUnderSampler(random_state=0)
|
||||
|
||||
feature_extractors = [
|
||||
bug_features.has_str(),
|
||||
bug_features.severity(),
|
||||
bug_features.keywords({"regression", "regressionwindow-wanted"}),
|
||||
bug_features.is_coverity_issue(),
|
||||
bug_features.has_crash_signature(),
|
||||
bug_features.has_url(),
|
||||
bug_features.has_w3c_url(),
|
||||
bug_features.has_github_url(),
|
||||
bug_features.whiteboard(),
|
||||
bug_features.patches(),
|
||||
bug_features.landings(),
|
||||
bug_features.title(),
|
||||
]
|
||||
|
||||
cleanup_functions = [
|
||||
feature_cleanup.fileref(),
|
||||
feature_cleanup.url(),
|
||||
feature_cleanup.synonyms(),
|
||||
]
|
||||
|
||||
self.extraction_pipeline = Pipeline(
|
||||
[
|
||||
(
|
||||
"bug_extractor",
|
||||
bug_features.BugExtractor(feature_extractors, cleanup_functions),
|
||||
),
|
||||
(
|
||||
"union",
|
||||
ColumnTransformer(
|
||||
[
|
||||
("data", DictVectorizer(), "data"),
|
||||
("title", self.text_vectorizer(), "title"),
|
||||
("comments", self.text_vectorizer(), "comments"),
|
||||
]
|
||||
),
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
self.clf = xgboost.XGBClassifier(n_jobs=16)
|
||||
self.clf.set_params(predictor="cpu_predictor")
|
||||
|
||||
def get_labels(self):
|
||||
classes = {}
|
||||
|
||||
for bug_data in bugzilla.get_bugs():
|
||||
bug_id = int(bug_data["id"])
|
||||
if "regressionwindow-wanted" in bug_data["keywords"]:
|
||||
classes[bug_id] = 0
|
||||
elif "cf_has_regression_range" in bug_data:
|
||||
if bug_data["cf_has_regression_range"] == "yes":
|
||||
classes[bug_id] = 1
|
||||
elif bug_data["cf_has_regression_range"] == "no":
|
||||
classes[bug_id] = 0
|
||||
print(
|
||||
"{} bugs have regression range".format(
|
||||
sum(1 for label in classes.values() if label == 1)
|
||||
)
|
||||
)
|
||||
print(
|
||||
"{} bugs don't have a regression range".format(
|
||||
sum(1 for label in classes.values() if label == 0)
|
||||
)
|
||||
)
|
||||
|
||||
return classes, [0, 1]
|
||||
|
||||
def get_feature_names(self):
|
||||
return self.extraction_pipeline.named_steps["union"].get_feature_names()
|
1
run.py
1
run.py
|
@ -38,6 +38,7 @@ if __name__ == "__main__":
|
|||
"assignee",
|
||||
"bugtype",
|
||||
"stepstoreproduce",
|
||||
"regressionrange",
|
||||
# commit classifiers
|
||||
"backout",
|
||||
],
|
||||
|
|
Загрузка…
Ссылка в новой задаче