зеркало из https://github.com/mozilla/bugbug.git
Add a 'types' field to the changes artifact which specifies the bug types
This commit is contained in:
Родитель
419cadf90f
Коммит
773cdcb7c6
|
@ -3,6 +3,8 @@
|
||||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
|
||||||
|
from typing import Dict, Iterable, List, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import xgboost
|
import xgboost
|
||||||
from sklearn.compose import ColumnTransformer
|
from sklearn.compose import ColumnTransformer
|
||||||
|
@ -28,7 +30,17 @@ KEYWORD_DICT = {
|
||||||
"crashreportid": "crash",
|
"crashreportid": "crash",
|
||||||
"perf": "performance",
|
"perf": "performance",
|
||||||
}
|
}
|
||||||
KEYWORD_LIST = sorted(set(KEYWORD_DICT.values()))
|
TYPE_LIST = sorted(set(KEYWORD_DICT.values()))
|
||||||
|
|
||||||
|
|
||||||
|
def bug_to_types(bug: bugzilla.BugDict) -> List[str]:
|
||||||
|
return list(
|
||||||
|
set(
|
||||||
|
KEYWORD_DICT[keyword]
|
||||||
|
for keyword in bug["keywords"]
|
||||||
|
if keyword in KEYWORD_DICT
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class BugTypeModel(BugModel):
|
class BugTypeModel(BugModel):
|
||||||
|
@ -96,29 +108,32 @@ class BugTypeModel(BugModel):
|
||||||
xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
|
xgboost.XGBClassifier(n_jobs=utils.get_physical_cpu_count())
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_labels(self):
|
def get_labels(self) -> Tuple[Dict[int, np.ndarray], List[str]]:
|
||||||
classes = {}
|
classes = {}
|
||||||
|
|
||||||
for bug_data in bugzilla.get_bugs():
|
for bug_data in bugzilla.get_bugs():
|
||||||
target = np.zeros(len(KEYWORD_LIST))
|
target = np.zeros(len(TYPE_LIST))
|
||||||
for keyword in bug_data["keywords"]:
|
for type_ in bug_to_types(bug_data):
|
||||||
if keyword in KEYWORD_DICT:
|
target[TYPE_LIST.index(type_)] = 1
|
||||||
target[KEYWORD_LIST.index(KEYWORD_DICT[keyword])] = 1
|
|
||||||
|
|
||||||
classes[int(bug_data["id"])] = target
|
classes[int(bug_data["id"])] = target
|
||||||
|
|
||||||
return classes, KEYWORD_LIST
|
return classes, TYPE_LIST
|
||||||
|
|
||||||
def get_feature_names(self):
|
def get_feature_names(self):
|
||||||
return self.extraction_pipeline.named_steps["union"].get_feature_names()
|
return self.extraction_pipeline.named_steps["union"].get_feature_names()
|
||||||
|
|
||||||
def overwrite_classes(self, bugs, classes, probabilities):
|
def overwrite_classes(
|
||||||
|
self,
|
||||||
|
bugs: Iterable[bugzilla.BugDict],
|
||||||
|
classes: Dict[int, np.ndarray],
|
||||||
|
probabilities: bool,
|
||||||
|
):
|
||||||
for i, bug in enumerate(bugs):
|
for i, bug in enumerate(bugs):
|
||||||
for keyword in bug["keywords"]:
|
for type_ in bug_to_types(bug):
|
||||||
if keyword in KEYWORD_LIST:
|
|
||||||
if probabilities:
|
if probabilities:
|
||||||
classes[i][KEYWORD_LIST.index(keyword)] = 1.0
|
classes[i][TYPE_LIST.index(type_)] = 1.0
|
||||||
else:
|
else:
|
||||||
classes[i][KEYWORD_LIST.index(keyword)] = 1
|
classes[i][TYPE_LIST.index(type_)] = 1
|
||||||
|
|
||||||
return classes
|
return classes
|
||||||
|
|
|
@ -17,6 +17,7 @@ import requests
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from bugbug import bugzilla, db, phabricator, repository, test_scheduling
|
from bugbug import bugzilla, db, phabricator, repository, test_scheduling
|
||||||
|
from bugbug.models.bugtype import bug_to_types
|
||||||
from bugbug.models.regressor import BUG_FIXING_COMMITS_DB, RegressorModel
|
from bugbug.models.regressor import BUG_FIXING_COMMITS_DB, RegressorModel
|
||||||
from bugbug.utils import (
|
from bugbug.utils import (
|
||||||
download_check_etag,
|
download_check_etag,
|
||||||
|
@ -469,6 +470,7 @@ class LandingsRiskReportGenerator(object):
|
||||||
component_team_mapping, bug["product"], bug["component"]
|
component_team_mapping, bug["product"], bug["component"]
|
||||||
),
|
),
|
||||||
"summary": bug["summary"],
|
"summary": bug["summary"],
|
||||||
|
"types": bug_to_types(bug),
|
||||||
"creation_date": dateutil.parser.parse(bug["creation_time"]).strftime(
|
"creation_date": dateutil.parser.parse(bug["creation_time"]).strftime(
|
||||||
"%Y-%m-%d"
|
"%Y-%m-%d"
|
||||||
),
|
),
|
||||||
|
|
Загрузка…
Ссылка в новой задаче