зеркало из https://github.com/mozilla/bugbug.git
Move label gathering code in the models themselves
This commit is contained in:
Родитель
569a4ab8f1
Коммит
a6f442b7b2
118
bugbug/labels.py
118
bugbug/labels.py
|
@ -7,128 +7,16 @@ import csv
|
|||
import os
|
||||
import sys
|
||||
|
||||
from bugbug import bugzilla
|
||||
|
||||
|
||||
def get_labels_dir():
|
||||
return os.path.join(os.path.dirname(sys.modules[__package__].__file__), 'labels')
|
||||
|
||||
|
||||
def get_tracking_labels():
|
||||
classes = {}
|
||||
|
||||
for bug_data in bugzilla.get_bugs():
|
||||
bug_id = int(bug_data['id'])
|
||||
|
||||
for entry in bug_data['history']:
|
||||
for change in entry['changes']:
|
||||
if change['field_name'].startswith('cf_tracking_firefox'):
|
||||
if change['added'] in ['blocking', '+']:
|
||||
classes[bug_id] = True
|
||||
elif change['added'] == '-':
|
||||
classes[bug_id] = False
|
||||
|
||||
if bug_id not in classes:
|
||||
classes[bug_id] = False
|
||||
|
||||
return classes
|
||||
|
||||
|
||||
def get_qa_needed_labels():
|
||||
classes = {}
|
||||
|
||||
for bug_data in bugzilla.get_bugs():
|
||||
bug_id = int(bug_data['id'])
|
||||
|
||||
for entry in bug_data['history']:
|
||||
for change in entry['changes']:
|
||||
if change['added'].startswith('qawanted'):
|
||||
classes[bug_id] = True
|
||||
elif 'flags' in entry:
|
||||
for flag in entry['flags']:
|
||||
if flag['name'].startswith('qe-verify'):
|
||||
classes[bug_id] = True
|
||||
|
||||
if bug_id not in classes:
|
||||
classes[bug_id] = False
|
||||
|
||||
return classes
|
||||
|
||||
|
||||
def get_bugbug_labels(kind='bug', augmentation=False):
|
||||
assert kind in ['bug', 'regression']
|
||||
|
||||
classes = {}
|
||||
|
||||
with open(os.path.join(get_labels_dir(), 'bug_nobug.csv'), 'r') as f:
|
||||
def get_labels(file_name):
|
||||
with open(os.path.join(get_labels_dir(), '{}.csv'.format(file_name)), 'r') as f:
|
||||
reader = csv.reader(f)
|
||||
next(reader)
|
||||
for bug_id, category in reader:
|
||||
assert category in ['True', 'False'], 'unexpected category {}'.format(category)
|
||||
if kind == 'bug':
|
||||
classes[int(bug_id)] = True if category == 'True' else False
|
||||
elif kind == 'regression':
|
||||
if category == 'False':
|
||||
classes[int(bug_id)] = False
|
||||
|
||||
with open(os.path.join(get_labels_dir(), 'regression_bug_nobug.csv'), 'r') as f:
|
||||
reader = csv.reader(f)
|
||||
next(reader)
|
||||
for bug_id, category in reader:
|
||||
assert category in ['nobug', 'bug_unknown_regression', 'bug_no_regression', 'regression'], 'unexpected category {}'.format(category)
|
||||
if kind == 'bug':
|
||||
classes[int(bug_id)] = True if category != 'nobug' else False
|
||||
elif kind == 'regression':
|
||||
if category == 'bug_unknown_regression':
|
||||
continue
|
||||
|
||||
classes[int(bug_id)] = True if category == 'regression' else False
|
||||
|
||||
bug_ids = set()
|
||||
for bug in bugzilla.get_bugs():
|
||||
bug_id = int(bug['id'])
|
||||
|
||||
bug_ids.add(bug_id)
|
||||
|
||||
if bug_id in classes:
|
||||
continue
|
||||
|
||||
# If augmentation is enabled, use bugs marked as 'regression' or 'feature',
|
||||
# as they are basically labelled.
|
||||
if not augmentation:
|
||||
continue
|
||||
|
||||
if any(keyword in bug['keywords'] for keyword in ['regression', 'talos-regression']) or ('cf_has_regression_range' in bug and bug['cf_has_regression_range'] == 'yes'):
|
||||
classes[bug_id] = True
|
||||
elif any(keyword in bug['keywords'] for keyword in ['feature']):
|
||||
classes[bug_id] = False
|
||||
elif kind == 'regression':
|
||||
for history in bug['history']:
|
||||
for change in history['changes']:
|
||||
if change['field_name'] == 'keywords' and change['removed'] == 'regression':
|
||||
classes[bug_id] = False
|
||||
|
||||
# Remove labels which belong to bugs for which we have no data.
|
||||
return {bug_id: label for bug_id, label in classes.items() if bug_id in bug_ids}
|
||||
|
||||
|
||||
def get_uplift_labels():
|
||||
classes = {}
|
||||
|
||||
for bug_data in bugzilla.get_bugs():
|
||||
bug_id = int(bug_data['id'])
|
||||
|
||||
for attachment in bug_data['attachments']:
|
||||
for flag in attachment['flags']:
|
||||
if not flag['name'].startswith('approval-mozilla-') or flag['status'] not in ['+', '-']:
|
||||
continue
|
||||
|
||||
if flag['status'] == '+':
|
||||
classes[bug_id] = True
|
||||
elif flag['status'] == '-':
|
||||
classes[bug_id] = False
|
||||
|
||||
return classes
|
||||
yield from reader
|
||||
|
||||
|
||||
def get_all_bug_ids():
|
||||
|
|
|
@ -0,0 +1,318 @@
|
|||
bug_id,comment_num,has_str
|
||||
641030,0,y
|
||||
1041822,0,n
|
||||
1041822,1,n
|
||||
1041822,2,n
|
||||
1041822,3,n
|
||||
1041822,4,n
|
||||
1041822,5,n
|
||||
1041822,6,n
|
||||
1041830,0,n
|
||||
1041830,1,n
|
||||
1041830,2,n
|
||||
1041830,3,n
|
||||
1041830,4,n
|
||||
1041830,5,n
|
||||
1041830,6,n
|
||||
1041830,7,n
|
||||
1041830,8,n
|
||||
1041836,0,n
|
||||
1041836,1,n
|
||||
1041836,2,n
|
||||
1041836,3,n
|
||||
1041836,4,n
|
||||
1041836,5,n
|
||||
1041836,6,n
|
||||
1041836,7,n
|
||||
1041836,8,n
|
||||
1044586,0,y
|
||||
1046143,0,y
|
||||
1046255,0,y
|
||||
1049215,0,n
|
||||
1051830,0,y
|
||||
1052947,0,y
|
||||
1055310,14,n
|
||||
1055973,0,y
|
||||
1058713,0,y
|
||||
1060929,0,y
|
||||
1067153,0,y
|
||||
1068439,0,n
|
||||
1069724,0,y
|
||||
1070722,0,y
|
||||
1070988,0,y
|
||||
1071686,0,y
|
||||
1072193,0,y
|
||||
1072954,0,y
|
||||
1072991,0,y
|
||||
1073991,0,y
|
||||
1077369,0,y
|
||||
1078743,0,y
|
||||
1080971,0,y
|
||||
1081514,0,y
|
||||
1081677,0,y
|
||||
1082249,0,y
|
||||
1083996,0,y
|
||||
1091109,0,y
|
||||
1092626,0,y
|
||||
1100966,42,n
|
||||
1103635,0,y
|
||||
1105066,0,y
|
||||
1107702,0,n
|
||||
1109155,0,y
|
||||
1111337,0,y
|
||||
1113238,0,y
|
||||
1113834,0,y
|
||||
1114774,1,n
|
||||
1117984,0,y
|
||||
1120065,0,y
|
||||
1121706,0,y
|
||||
1124221,0,y
|
||||
1131463,0,n
|
||||
1131685,0,y
|
||||
1132918,0,y
|
||||
1133356,0,y
|
||||
1134531,0,y
|
||||
1137234,0,y
|
||||
1137906,0,y
|
||||
1140616,0,y
|
||||
1143644,0,y
|
||||
1149608,0,n
|
||||
1152469,0,y
|
||||
1156636,0,y
|
||||
1159259,0,y
|
||||
1162372,0,y
|
||||
1166133,0,y
|
||||
1167105,0,y
|
||||
1168081,0,y
|
||||
1173548,0,y
|
||||
1173792,0,y
|
||||
1175941,0,y
|
||||
1176018,0,y
|
||||
1176551,0,y
|
||||
1177619,0,y
|
||||
1185886,0,y
|
||||
1185927,0,y
|
||||
1187232,0,y
|
||||
1187404,0,y
|
||||
1189715,0,y
|
||||
1190112,0,y
|
||||
1191539,0,y
|
||||
1192539,0,y
|
||||
1192720,0,y
|
||||
1193695,0,y
|
||||
1197569,0,y
|
||||
1198723,0,y
|
||||
1199296,0,y
|
||||
1200602,0,y
|
||||
1204483,0,y
|
||||
1204809,0,y
|
||||
1205476,0,n
|
||||
1205833,0,y
|
||||
1206545,0,y
|
||||
1207536,0,y
|
||||
1207546,0,y
|
||||
1207821,0,y
|
||||
1213688,19,n
|
||||
1214261,0,y
|
||||
1215948,0,y
|
||||
1216366,0,y
|
||||
1221030,0,y
|
||||
1224936,0,y
|
||||
1225125,0,y
|
||||
1225882,0,y
|
||||
1229742,0,y
|
||||
1231758,0,y
|
||||
1232087,0,n
|
||||
1232328,0,y
|
||||
1232346,0,y
|
||||
1233429,0,y
|
||||
1233625,0,y
|
||||
1238427,0,y
|
||||
1240561,0,y
|
||||
1243034,0,y
|
||||
1243413,0,y
|
||||
1243657,0,y
|
||||
1243657,14,n
|
||||
1244597,25,n
|
||||
1246606,0,y
|
||||
1248948,0,y
|
||||
1252974,0,y
|
||||
1253399,0,y
|
||||
1257063,0,n
|
||||
1260022,0,y
|
||||
1261228,0,y
|
||||
1262125,0,y
|
||||
1266372,0,y
|
||||
1268069,0,y
|
||||
1268141,0,y
|
||||
1271173,0,y
|
||||
1273024,0,y
|
||||
1273882,0,y
|
||||
1274362,0,y
|
||||
1275880,0,y
|
||||
1276717,0,n
|
||||
1277295,0,y
|
||||
1278221,0,y
|
||||
1278581,0,y
|
||||
1279036,0,y
|
||||
1279744,0,n
|
||||
1279928,0,y
|
||||
1283721,0,y
|
||||
1286459,0,n
|
||||
1287066,0,y
|
||||
1291270,8,n
|
||||
1292337,0,y
|
||||
1292904,0,y
|
||||
1294602,0,y
|
||||
1295193,0,y
|
||||
1295354,0,y
|
||||
1297549,0,y
|
||||
1298205,0,n
|
||||
1299324,0,y
|
||||
1300805,0,y
|
||||
1302414,0,y
|
||||
1303727,0,y
|
||||
1309219,0,y
|
||||
1309413,0,y
|
||||
1313272,0,n
|
||||
1314128,0,y
|
||||
1314491,0,y
|
||||
1315608,0,y
|
||||
1319911,0,y
|
||||
1320502,0,y
|
||||
1320565,0,y
|
||||
1321069,0,y
|
||||
1322274,17,n
|
||||
1322441,0,y
|
||||
1325955,0,y
|
||||
1326163,0,y
|
||||
1328023,0,y
|
||||
1329386,0,y
|
||||
1329631,0,y
|
||||
1330609,0,y
|
||||
1330836,0,y
|
||||
1334677,0,n
|
||||
1338005,0,y
|
||||
1339497,0,y
|
||||
1340127,0,y
|
||||
1341521,0,y
|
||||
1348148,0,n
|
||||
1348701,0,y
|
||||
1349552,19,n
|
||||
1351102,0,y
|
||||
1352004,0,y
|
||||
1352108,0,y
|
||||
1353039,0,y
|
||||
1353041,0,y
|
||||
1356883,0,y
|
||||
1356921,0,y
|
||||
1357098,0,y
|
||||
1358964,0,y
|
||||
1362764,0,n
|
||||
1362984,8,n
|
||||
1363406,0,y
|
||||
1364727,0,y
|
||||
1365133,213,n
|
||||
1366824,0,y
|
||||
1367688,0,y
|
||||
1368464,0,y
|
||||
1368852,0,y
|
||||
1369246,0,y
|
||||
1372043,0,y
|
||||
1372448,0,y
|
||||
1373528,0,y
|
||||
1373823,0,y
|
||||
1373937,0,n
|
||||
1374584,0,y
|
||||
1374653,0,y
|
||||
1376406,0,y
|
||||
1377597,0,y
|
||||
1379624,0,n
|
||||
1380323,0,y
|
||||
1381682,0,y
|
||||
1382719,22,n
|
||||
1383363,0,y
|
||||
1385440,0,y
|
||||
1386483,0,y
|
||||
1387476,0,y
|
||||
1388043,0,y
|
||||
1388394,0,y
|
||||
1389377,0,y
|
||||
1390863,0,y
|
||||
1392659,0,y
|
||||
1397241,0,y
|
||||
1397737,0,y
|
||||
1399388,0,y
|
||||
1399400,0,y
|
||||
1399559,0,y
|
||||
1399651,0,y
|
||||
1400165,0,y
|
||||
1400556,0,y
|
||||
1400604,0,y
|
||||
1401224,0,y
|
||||
1401943,0,y
|
||||
1402244,0,y
|
||||
1403166,0,y
|
||||
1403750,0,y
|
||||
1404497,0,y
|
||||
1405696,0,y
|
||||
1406414,0,y
|
||||
1406509,0,y
|
||||
1407748,0,y
|
||||
1407983,6,n
|
||||
1408613,0,y
|
||||
1408834,0,y
|
||||
1409634,0,n
|
||||
1410028,0,y
|
||||
1410225,0,y
|
||||
1412213,0,y
|
||||
1417014,0,y
|
||||
1418814,0,y
|
||||
1419173,0,y
|
||||
1421240,0,y
|
||||
1421905,0,y
|
||||
1422215,0,y
|
||||
1422478,0,y
|
||||
1423810,0,y
|
||||
1426081,0,y
|
||||
1428174,0,y
|
||||
1435451,0,y
|
||||
1436311,0,y
|
||||
1437310,1,y
|
||||
1437832,0,y
|
||||
1439285,0,y
|
||||
1439857,0,y
|
||||
1443224,0,y
|
||||
1443583,10,n
|
||||
1445207,0,y
|
||||
1446445,0,y
|
||||
1447052,0,y
|
||||
1449887,0,y
|
||||
1450248,0,y
|
||||
1451683,0,y
|
||||
1451688,0,y
|
||||
1452392,0,y
|
||||
1452673,0,y
|
||||
1452819,0,y
|
||||
1453718,0,y
|
||||
1454572,0,y
|
||||
1457039,0,y
|
||||
1458615,0,y
|
||||
1458866,0,y
|
||||
1461635,0,y
|
||||
1461881,0,y
|
||||
1462906,0,y
|
||||
1463694,1,n
|
||||
1464789,0,y
|
||||
1465616,117,n
|
||||
1471935,0,y
|
||||
1474284,0,y
|
||||
1478754,0,y
|
||||
1480934,0,y
|
||||
1483148,0,y
|
||||
1483593,0,n
|
||||
1483865,6,n
|
||||
1485178,0,y
|
||||
1487263,0,y
|
||||
1488105,0,y
|
||||
1498940,0,y
|
|
|
@ -27,16 +27,18 @@ class Model():
|
|||
return []
|
||||
|
||||
def train(self):
|
||||
classes = self.get_labels()
|
||||
|
||||
# Get bugs.
|
||||
def bugs_all():
|
||||
return bugzilla.get_bugs()
|
||||
|
||||
# Filter out bugs for which we have no labels.
|
||||
def bugs():
|
||||
return (bug for bug in bugs_all() if bug['id'] in self.classes)
|
||||
return (bug for bug in bugs_all() if bug['id'] in classes)
|
||||
|
||||
# Calculate labels.
|
||||
y = np.array([1 if self.classes[bug['id']] else 0 for bug in bugs()])
|
||||
y = np.array([1 if classes[bug['id']] else 0 for bug in bugs()])
|
||||
|
||||
# Extract features from the bugs.
|
||||
X = self.extraction_pipeline.fit_transform(bugs())
|
||||
|
|
|
@ -9,6 +9,7 @@ from sklearn.pipeline import FeatureUnion
|
|||
from sklearn.pipeline import Pipeline
|
||||
|
||||
from bugbug import bug_features
|
||||
from bugbug import bugzilla
|
||||
from bugbug import labels
|
||||
from bugbug.model import Model
|
||||
from bugbug.utils import DictSelector
|
||||
|
@ -18,8 +19,6 @@ class BugModel(Model):
|
|||
def __init__(self, lemmatization=False):
|
||||
Model.__init__(self, lemmatization)
|
||||
|
||||
self.classes = labels.get_bugbug_labels(kind='bug', augmentation=True)
|
||||
|
||||
feature_extractors = [
|
||||
bug_features.has_str(),
|
||||
bug_features.severity(),
|
||||
|
@ -65,6 +64,56 @@ class BugModel(Model):
|
|||
])
|
||||
|
||||
self.clf = xgboost.XGBClassifier(n_jobs=16)
|
||||
self.clf.set_params(tree_method='exact', predictor='cpu_predictor')
|
||||
|
||||
def get_bugbug_labels(self, kind='bug'):
|
||||
assert kind in ['bug', 'regression']
|
||||
|
||||
classes = {}
|
||||
|
||||
for bug_id, category in labels.get_labels('bug_nobug'):
|
||||
assert category in ['True', 'False'], 'unexpected category {}'.format(category)
|
||||
if kind == 'bug':
|
||||
classes[int(bug_id)] = True if category == 'True' else False
|
||||
elif kind == 'regression':
|
||||
if category == 'False':
|
||||
classes[int(bug_id)] = False
|
||||
|
||||
for bug_id, category in labels.get_labels('regression_bug_nobug'):
|
||||
assert category in ['nobug', 'bug_unknown_regression', 'bug_no_regression', 'regression'], 'unexpected category {}'.format(category)
|
||||
if kind == 'bug':
|
||||
classes[int(bug_id)] = True if category != 'nobug' else False
|
||||
elif kind == 'regression':
|
||||
if category == 'bug_unknown_regression':
|
||||
continue
|
||||
|
||||
classes[int(bug_id)] = True if category == 'regression' else False
|
||||
|
||||
# Augment labes by using bugs marked as 'regression' or 'feature', as they are basically labelled.
|
||||
bug_ids = set()
|
||||
for bug in bugzilla.get_bugs():
|
||||
bug_id = int(bug['id'])
|
||||
|
||||
bug_ids.add(bug_id)
|
||||
|
||||
if bug_id in classes:
|
||||
continue
|
||||
|
||||
if any(keyword in bug['keywords'] for keyword in ['regression', 'talos-regression']) or ('cf_has_regression_range' in bug and bug['cf_has_regression_range'] == 'yes'):
|
||||
classes[bug_id] = True
|
||||
elif any(keyword in bug['keywords'] for keyword in ['feature']):
|
||||
classes[bug_id] = False
|
||||
elif kind == 'regression':
|
||||
for history in bug['history']:
|
||||
for change in history['changes']:
|
||||
if change['field_name'] == 'keywords' and change['removed'] == 'regression':
|
||||
classes[bug_id] = False
|
||||
|
||||
# Remove labels which belong to bugs for which we have no data.
|
||||
return {bug_id: label for bug_id, label in classes.items() if bug_id in bug_ids}
|
||||
|
||||
def get_labels(self):
|
||||
return self.get_bugbug_labels('bug')
|
||||
|
||||
def get_feature_names(self):
|
||||
return ['data_' + name for name in self.data_vectorizer.get_feature_names()] +\
|
||||
|
|
|
@ -9,7 +9,7 @@ from sklearn.pipeline import FeatureUnion
|
|||
from sklearn.pipeline import Pipeline
|
||||
|
||||
from bugbug import bug_features
|
||||
from bugbug import labels
|
||||
from bugbug import bugzilla
|
||||
from bugbug.model import Model
|
||||
from bugbug.utils import DictSelector
|
||||
|
||||
|
@ -18,8 +18,6 @@ class QANeededModel(Model):
|
|||
def __init__(self, lemmatization=False):
|
||||
Model.__init__(self, lemmatization)
|
||||
|
||||
self.classes = labels.get_qa_needed_labels()
|
||||
|
||||
feature_extractors = [
|
||||
bug_features.has_str(),
|
||||
bug_features.has_regression_range(),
|
||||
|
@ -65,6 +63,26 @@ class QANeededModel(Model):
|
|||
|
||||
self.clf = xgboost.XGBClassifier(n_jobs=16)
|
||||
|
||||
def get_labels(self):
|
||||
classes = {}
|
||||
|
||||
for bug_data in bugzilla.get_bugs():
|
||||
bug_id = int(bug_data['id'])
|
||||
|
||||
for entry in bug_data['history']:
|
||||
for change in entry['changes']:
|
||||
if change['added'].startswith('qawanted'):
|
||||
classes[bug_id] = True
|
||||
elif 'flags' in entry:
|
||||
for flag in entry['flags']:
|
||||
if flag['name'].startswith('qe-verify'):
|
||||
classes[bug_id] = True
|
||||
|
||||
if bug_id not in classes:
|
||||
classes[bug_id] = False
|
||||
|
||||
return classes
|
||||
|
||||
def get_feature_names(self):
|
||||
return ['data_' + name for name in self.data_vectorizer.get_feature_names()] +\
|
||||
['title_' + name for name in self.title_vectorizer.get_feature_names()] +\
|
||||
|
|
|
@ -3,80 +3,12 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import xgboost
|
||||
from sklearn.feature_extraction import DictVectorizer
|
||||
from sklearn.pipeline import FeatureUnion
|
||||
from sklearn.pipeline import Pipeline
|
||||
|
||||
from bugbug import bug_features
|
||||
from bugbug import labels
|
||||
from bugbug.model import Model
|
||||
from bugbug.utils import DictSelector
|
||||
from bugbug.models.bug import BugModel
|
||||
|
||||
|
||||
class RegressionModel(Model):
|
||||
class RegressionModel(BugModel):
|
||||
def __init__(self, lemmatization=False):
|
||||
Model.__init__(self, lemmatization)
|
||||
BugModel.__init__(self, lemmatization)
|
||||
|
||||
self.classes = labels.get_bugbug_labels(kind='regression', augmentation=True)
|
||||
|
||||
feature_extractors = [
|
||||
bug_features.has_str(),
|
||||
bug_features.severity(),
|
||||
# Ignore keywords that would make the ML completely skewed
|
||||
# (we are going to use them as 100% rules in the evaluation phase).
|
||||
bug_features.keywords(set(['regression', 'talos-regression', 'feature'])),
|
||||
bug_features.is_coverity_issue(),
|
||||
bug_features.has_crash_signature(),
|
||||
bug_features.has_url(),
|
||||
bug_features.has_w3c_url(),
|
||||
bug_features.has_github_url(),
|
||||
bug_features.whiteboard(),
|
||||
bug_features.patches(),
|
||||
bug_features.landings(),
|
||||
bug_features.title(),
|
||||
bug_features.comments(),
|
||||
]
|
||||
|
||||
self.data_vectorizer = DictVectorizer()
|
||||
self.title_vectorizer = self.text_vectorizer(stop_words='english')
|
||||
self.comments_vectorizer = self.text_vectorizer(stop_words='english')
|
||||
|
||||
self.extraction_pipeline = Pipeline([
|
||||
('bug_extractor', bug_features.BugExtractor(feature_extractors)),
|
||||
('union', FeatureUnion(
|
||||
transformer_list=[
|
||||
('data', Pipeline([
|
||||
('selector', DictSelector(key='data')),
|
||||
('vect', self.data_vectorizer),
|
||||
])),
|
||||
|
||||
('title', Pipeline([
|
||||
('selector', DictSelector(key='title')),
|
||||
('tfidf', self.title_vectorizer),
|
||||
])),
|
||||
|
||||
('comments', Pipeline([
|
||||
('selector', DictSelector(key='comments')),
|
||||
('tfidf', self.comments_vectorizer),
|
||||
])),
|
||||
],
|
||||
)),
|
||||
])
|
||||
|
||||
self.clf = xgboost.XGBClassifier(n_jobs=16)
|
||||
self.clf.set_params(tree_method='exact', predictor='cpu_predictor')
|
||||
|
||||
def get_feature_names(self):
|
||||
return ['data_' + name for name in self.data_vectorizer.get_feature_names()] +\
|
||||
['title_' + name for name in self.title_vectorizer.get_feature_names()] +\
|
||||
['comments_' + name for name in self.comments_vectorizer.get_feature_names()]
|
||||
|
||||
def overwrite_classes(self, bugs, classes, probabilities):
|
||||
for i, bug in enumerate(bugs):
|
||||
if any(keyword in bug['keywords'] for keyword in ['regression', 'talos-regression']) or ('cf_has_regression_range' in bug and bug['cf_has_regression_range'] == 'yes'):
|
||||
classes[i] = 1 if not probabilities else [1., 0.]
|
||||
elif 'feature' in bug['keywords']:
|
||||
classes[i] = 0 if not probabilities else [0., 1.]
|
||||
|
||||
return classes
|
||||
def get_labels(self):
|
||||
return self.get_bugbug_labels('regression')
|
||||
|
|
|
@ -9,7 +9,7 @@ from sklearn.pipeline import FeatureUnion
|
|||
from sklearn.pipeline import Pipeline
|
||||
|
||||
from bugbug import bug_features
|
||||
from bugbug import labels
|
||||
from bugbug import bugzilla
|
||||
from bugbug.model import Model
|
||||
from bugbug.utils import DictSelector
|
||||
|
||||
|
@ -18,8 +18,6 @@ class TrackingModel(Model):
|
|||
def __init__(self, lemmatization=False):
|
||||
Model.__init__(self, lemmatization)
|
||||
|
||||
self.classes = labels.get_tracking_labels()
|
||||
|
||||
feature_extractors = [
|
||||
bug_features.has_str(),
|
||||
bug_features.has_regression_range(),
|
||||
|
@ -65,6 +63,25 @@ class TrackingModel(Model):
|
|||
|
||||
self.clf = xgboost.XGBClassifier(n_jobs=16)
|
||||
|
||||
def get_labels(self):
|
||||
classes = {}
|
||||
|
||||
for bug_data in bugzilla.get_bugs():
|
||||
bug_id = int(bug_data['id'])
|
||||
|
||||
for entry in bug_data['history']:
|
||||
for change in entry['changes']:
|
||||
if change['field_name'].startswith('cf_tracking_firefox'):
|
||||
if change['added'] in ['blocking', '+']:
|
||||
classes[bug_id] = True
|
||||
elif change['added'] == '-':
|
||||
classes[bug_id] = False
|
||||
|
||||
if bug_id not in classes:
|
||||
classes[bug_id] = False
|
||||
|
||||
return classes
|
||||
|
||||
def get_feature_names(self):
|
||||
return ['data_' + name for name in self.data_vectorizer.get_feature_names()] +\
|
||||
['title_' + name for name in self.title_vectorizer.get_feature_names()] +\
|
||||
|
|
|
@ -9,7 +9,7 @@ from sklearn.pipeline import FeatureUnion
|
|||
from sklearn.pipeline import Pipeline
|
||||
|
||||
from bugbug import bug_features
|
||||
from bugbug import labels
|
||||
from bugbug import bugzilla
|
||||
from bugbug.model import Model
|
||||
from bugbug.utils import DictSelector
|
||||
|
||||
|
@ -18,8 +18,6 @@ class UpliftModel(Model):
|
|||
def __init__(self, lemmatization=False):
|
||||
Model.__init__(self, lemmatization)
|
||||
|
||||
self.classes = labels.get_uplift_labels()
|
||||
|
||||
feature_extractors = [
|
||||
bug_features.has_str(),
|
||||
bug_features.has_regression_range(),
|
||||
|
@ -65,6 +63,24 @@ class UpliftModel(Model):
|
|||
|
||||
self.clf = xgboost.XGBClassifier(n_jobs=16)
|
||||
|
||||
def get_labels(self):
|
||||
classes = {}
|
||||
|
||||
for bug_data in bugzilla.get_bugs():
|
||||
bug_id = int(bug_data['id'])
|
||||
|
||||
for attachment in bug_data['attachments']:
|
||||
for flag in attachment['flags']:
|
||||
if not flag['name'].startswith('approval-mozilla-') or flag['status'] not in ['+', '-']:
|
||||
continue
|
||||
|
||||
if flag['status'] == '+':
|
||||
classes[bug_id] = True
|
||||
elif flag['status'] == '-':
|
||||
classes[bug_id] = False
|
||||
|
||||
return classes
|
||||
|
||||
def get_feature_names(self):
|
||||
return ['data_' + name for name in self.data_vectorizer.get_feature_names()] +\
|
||||
['title_' + name for name in self.title_vectorizer.get_feature_names()] +\
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from bugbug.models.bug import BugModel
|
||||
|
||||
|
||||
def test_get_tracking_labels():
|
||||
model = BugModel()
|
||||
classes = model.get_labels()
|
||||
# labels from bug_nobug.csv
|
||||
assert classes[1087488]
|
||||
assert not classes[1101825]
|
||||
# labels from regression_bug_nobug.csv
|
||||
assert not classes[447581] # nobug
|
||||
assert classes[518272] # regression
|
||||
assert classes[528988] # bug_unknown_regression
|
||||
assert classes[1037762] # bug_no_regression
|
|
@ -3,22 +3,16 @@
|
|||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import os
|
||||
|
||||
from bugbug import labels
|
||||
|
||||
|
||||
def test_get_tracking_labels():
|
||||
classes = labels.get_tracking_labels()
|
||||
assert not classes[1101825]
|
||||
assert classes[1042096]
|
||||
def test_get_labels_dir():
|
||||
path = labels.get_labels_dir()
|
||||
assert os.path.isabs(path)
|
||||
assert path.endswith('labels')
|
||||
|
||||
|
||||
def test_get_labels():
|
||||
classes = labels.get_bugbug_labels()
|
||||
# labels from bug_nobug.csv
|
||||
assert classes[1087488]
|
||||
assert not classes[1101825]
|
||||
# labels from regression_bug_nobug.csv
|
||||
assert not classes[447581] # nobug
|
||||
assert classes[518272] # regression
|
||||
assert classes[528988] # bug_unknown_regression
|
||||
assert classes[1037762] # bug_no_regression
|
||||
def test_get_all_bug_ids():
|
||||
labels.get_all_bug_ids()
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from bugbug.models.tracking import TrackingModel
|
||||
|
||||
|
||||
def test_get_tracking_labels():
|
||||
model = TrackingModel()
|
||||
classes = model.get_labels()
|
||||
assert not classes[1101825]
|
||||
assert classes[1042096]
|
Загрузка…
Ссылка в новой задаче