зеркало из https://github.com/mozilla/bugbug.git
Add an 'uplift' model
This commit is contained in:
Родитель
5aa7590677
Коммит
289ff7bf92
|
@ -107,6 +107,25 @@ def get_bugbug_labels(kind='bug', augmentation=False):
|
||||||
return {bug_id: label for bug_id, label in classes.items() if bug_id in bug_ids}
|
return {bug_id: label for bug_id, label in classes.items() if bug_id in bug_ids}
|
||||||
|
|
||||||
|
|
||||||
|
def get_uplift_labels():
|
||||||
|
classes = {}
|
||||||
|
|
||||||
|
for bug_data in bugzilla.get_bugs():
|
||||||
|
bug_id = int(bug_data['id'])
|
||||||
|
|
||||||
|
for attachment in bug_data['attachments']:
|
||||||
|
for flag in attachment['flags']:
|
||||||
|
if not flag['name'].startswith('approval-mozilla-') or flag['status'] not in ['+', '-']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if flag['status'] == '+':
|
||||||
|
classes[bug_id] = True
|
||||||
|
elif flag['status'] == '-':
|
||||||
|
classes[bug_id] = False
|
||||||
|
|
||||||
|
return classes
|
||||||
|
|
||||||
|
|
||||||
def get_all_bug_ids():
|
def get_all_bug_ids():
|
||||||
bug_ids = set()
|
bug_ids = set()
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,62 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||||
|
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
import xgboost
|
||||||
|
from sklearn.feature_extraction import DictVectorizer
|
||||||
|
from sklearn.pipeline import FeatureUnion
|
||||||
|
from sklearn.pipeline import Pipeline
|
||||||
|
|
||||||
|
from bugbug import bug_features
|
||||||
|
from bugbug import labels
|
||||||
|
from bugbug.model import Model
|
||||||
|
from bugbug.utils import DictSelector
|
||||||
|
|
||||||
|
|
||||||
|
class UpliftModel(Model):
|
||||||
|
def __init__(self, lemmatization=False):
|
||||||
|
Model.__init__(self, lemmatization)
|
||||||
|
|
||||||
|
self.classes = labels.get_uplift_labels()
|
||||||
|
|
||||||
|
feature_extractors = [
|
||||||
|
bug_features.has_str(),
|
||||||
|
bug_features.has_regression_range(),
|
||||||
|
bug_features.severity(),
|
||||||
|
bug_features.keywords(),
|
||||||
|
bug_features.is_coverity_issue(),
|
||||||
|
bug_features.has_crash_signature(),
|
||||||
|
bug_features.has_url(),
|
||||||
|
bug_features.has_w3c_url(),
|
||||||
|
bug_features.has_github_url(),
|
||||||
|
bug_features.whiteboard(),
|
||||||
|
bug_features.patches(),
|
||||||
|
bug_features.landings(),
|
||||||
|
bug_features.title(),
|
||||||
|
bug_features.comments(),
|
||||||
|
]
|
||||||
|
|
||||||
|
self.extraction_pipeline = Pipeline([
|
||||||
|
('bug_extractor', bug_features.BugExtractor(feature_extractors)),
|
||||||
|
('union', FeatureUnion(
|
||||||
|
transformer_list=[
|
||||||
|
('data', Pipeline([
|
||||||
|
('selector', DictSelector(key='data')),
|
||||||
|
('vect', DictVectorizer()),
|
||||||
|
])),
|
||||||
|
|
||||||
|
('title', Pipeline([
|
||||||
|
('selector', DictSelector(key='title')),
|
||||||
|
('tfidf', self.text_vectorizer(stop_words='english')),
|
||||||
|
])),
|
||||||
|
|
||||||
|
('comments', Pipeline([
|
||||||
|
('selector', DictSelector(key='comments')),
|
||||||
|
('tfidf', self.text_vectorizer(stop_words='english')),
|
||||||
|
])),
|
||||||
|
],
|
||||||
|
)),
|
||||||
|
])
|
||||||
|
|
||||||
|
self.clf = xgboost.XGBClassifier(n_jobs=16)
|
5
run.py
5
run.py
|
@ -13,7 +13,7 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('--lemmatization', help='Perform lemmatization (using spaCy)', action='store_true')
|
parser.add_argument('--lemmatization', help='Perform lemmatization (using spaCy)', action='store_true')
|
||||||
parser.add_argument('--download', help='Download data required for training', action='store_true')
|
parser.add_argument('--download', help='Download data required for training', action='store_true')
|
||||||
parser.add_argument('--train', help='Perform training', action='store_true')
|
parser.add_argument('--train', help='Perform training', action='store_true')
|
||||||
parser.add_argument('--goal', help='Goal of the classifier', choices=['bug', 'regression', 'tracking', 'qaneeded'], default='bug')
|
parser.add_argument('--goal', help='Goal of the classifier', choices=['bug', 'regression', 'tracking', 'qaneeded', 'uplift'], default='bug')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if args.download:
|
if args.download:
|
||||||
|
@ -34,6 +34,9 @@ if __name__ == '__main__':
|
||||||
elif args.goal == 'qaneeded':
|
elif args.goal == 'qaneeded':
|
||||||
from bugbug.models.qaneeded import QANeededModel
|
from bugbug.models.qaneeded import QANeededModel
|
||||||
model_class = QANeededModel
|
model_class = QANeededModel
|
||||||
|
elif args.goal == 'uplift':
|
||||||
|
from bugbug.models.uplift import UpliftModel
|
||||||
|
model_class = UpliftModel
|
||||||
|
|
||||||
if args.train:
|
if args.train:
|
||||||
model = model_class(args.lemmatization)
|
model = model_class(args.lemmatization)
|
||||||
|
|
Загрузка…
Ссылка в новой задаче