Refactor print statements to use logger (#3330)

This commit is contained in:
ElusiveEllie 2023-03-14 12:54:15 -04:00 коммит произвёл GitHub
Родитель 294286ae47
Коммит 8936218ab2
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
31 изменённых файлов: 305 добавлений и 189 удалений

Просмотреть файл

@ -3,11 +3,16 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from logging import INFO, basicConfig, getLogger
import dateutil.parser
from dateutil.relativedelta import relativedelta
from bugbug import bugzilla
basicConfig(level=INFO)
logger = getLogger(__name__)
def bool_str(val):
assert val in ["", "0", "1"], f"Unexpected boolean value: '{val}'"
@ -536,7 +541,7 @@ def rollback(bug, when=None, do_assert=False):
if do_assert:
assert False, msg
else:
print(msg)
logger.error(msg)
def parse_flag_change(change):
parts = change.split("(")
@ -869,9 +874,8 @@ def get_inconsistencies(bugs):
for bug in bugs:
try:
rollback(bug, do_assert=True)
except Exception as e:
print(bug["id"])
print(e)
except Exception:
logger.exception("Failed to rollback bug %s", bug["id"])
inconsistencies.append(bug)
return inconsistencies
@ -888,6 +892,6 @@ if __name__ == "__main__":
for bug in tqdm(bugzilla.get_bugs()):
if args.verbose:
print(bug["id"])
logger.info(bug["id"])
rollback(bug, do_assert=True)

Просмотреть файл

@ -7,6 +7,7 @@ import collections
import csv
import re
from datetime import datetime
from logging import INFO, basicConfig, getLogger
from typing import Iterable, Iterator, NewType, Optional
import tenacity
@ -16,6 +17,9 @@ from tqdm import tqdm
from bugbug import db, utils
basicConfig(level=INFO)
logger = getLogger(__name__)
BugDict = NewType("BugDict", dict)
BUGS_DB = "data/bugs.json"
@ -191,7 +195,7 @@ def download_bugs(bug_ids: Iterable[int], security: bool = False) -> list[BugDic
old_bug_count += 1
new_bug_ids_set.discard(int(bug["id"]))
print(f"Loaded {old_bug_count} bugs.")
logger.info("Loaded %d bugs.", old_bug_count)
new_bug_ids = sorted(list(new_bug_ids_set))
@ -417,8 +421,11 @@ def calculate_maintenance_effectiveness_indicator(
"closed": {},
}
print(
f"Calculating maintenance effectiveness indicator for the {team} team from {from_date} to {to_date}"
logger.info(
"Calculating maintenance effectiveness indicator for the %s team from %s to %s",
team,
from_date,
to_date,
)
for severity in MAINTENANCE_EFFECTIVENESS_SEVERITY_WEIGHTS.keys():

Просмотреть файл

@ -5,6 +5,7 @@
import pickle
from collections import defaultdict
from logging import INFO, basicConfig, getLogger
from typing import Any
import matplotlib
@ -29,6 +30,9 @@ from bugbug.github import Github
from bugbug.nlp import SpacyVectorizer
from bugbug.utils import split_tuple_generator, to_array
basicConfig(level=INFO)
logger = getLogger(__name__)
def classification_report_imbalanced_values(
y_true, y_pred, labels, target_names=None, sample_weight=None, digits=2, alpha=0.1
@ -398,7 +402,7 @@ class Model:
self.clf.fit(X_train, self.le.transform(y_train))
print("Model trained")
logger.info("Model trained")
feature_names = self.get_human_readable_feature_names()
if self.calculate_importance and len(feature_names):

Просмотреть файл

@ -3,6 +3,8 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from logging import INFO, basicConfig, getLogger
import xgboost
from imblearn.under_sampling import RandomUnderSampler
from sklearn.compose import ColumnTransformer
@ -12,6 +14,9 @@ from sklearn.pipeline import Pipeline
from bugbug import bugzilla, commit_features, feature_cleanup, labels, repository, utils
from bugbug.model import CommitModel
basicConfig(level=INFO)
logger = getLogger(__name__)
class AnnotateIgnoreModel(CommitModel):
def __init__(self, lemmatization: bool = False) -> None:
@ -105,16 +110,14 @@ class AnnotateIgnoreModel(CommitModel):
for node, label in labels.get_labels("annotateignore"):
classes[node] = int(label)
print(
"{} commits that can be ignored".format(
sum(1 for label in classes.values() if label == 1)
)
logger.info(
"%d commits that can be ignored",
sum(1 for label in classes.values() if label == 1),
)
print(
"{} commits that cannot be ignored".format(
sum(1 for label in classes.values() if label == 0)
)
logger.info(
"%d commits that cannot be ignored",
sum(1 for label in classes.values() if label == 0),
)
return classes, [0, 1]

Просмотреть файл

@ -4,6 +4,7 @@
# You can obtain one at http://mozilla.org/MPL/2.0/.
from collections import Counter
from logging import INFO, basicConfig, getLogger
import xgboost
from sklearn.compose import ColumnTransformer
@ -24,6 +25,9 @@ ADDRESSES_TO_EXCLUDE = [
"nobody@t4b.me",
]
basicConfig(level=INFO)
logger = getLogger(__name__)
class AssigneeModel(BugModel):
def __init__(self, lemmatization=False):
@ -100,9 +104,9 @@ class AssigneeModel(BugModel):
if count > MINIMUM_ASSIGNMENTS
)
print(f"{len(top_assignees)} assignees")
logger.info("%d assignees", len(top_assignees))
for assignee, count in assignee_counts:
print(f"{assignee}: {count}")
logger.info("%s: %d", assignee, count)
classes = {
bug_id: assignee

Просмотреть файл

@ -4,6 +4,7 @@
# You can obtain one at http://mozilla.org/MPL/2.0/.
from datetime import datetime
from logging import INFO, basicConfig, getLogger
import dateutil.parser
import xgboost
@ -16,6 +17,9 @@ from sklearn.pipeline import Pipeline
from bugbug import bug_features, commit_features, feature_cleanup, repository, utils
from bugbug.model import CommitModel
basicConfig(level=INFO)
logger = getLogger(__name__)
class BackoutModel(CommitModel):
def __init__(self, lemmatization=False, bug_data=False):
@ -107,15 +111,13 @@ class BackoutModel(CommitModel):
classes[commit_data["node"]] = 1 if commit_data["backedoutby"] else 0
print(
"{} commits were backed out".format(
sum(1 for label in classes.values() if label == 1)
)
logger.info(
"%d commits were backed out",
sum(1 for label in classes.values() if label == 1),
)
print(
"{} commits were not backed out".format(
sum(1 for label in classes.values() if label == 0)
)
logger.info(
"%d commits were not backed out",
sum(1 for label in classes.values() if label == 0),
)
return classes, [0, 1]

Просмотреть файл

@ -5,6 +5,7 @@
from collections import Counter
from datetime import datetime, timezone
from logging import INFO, basicConfig, getLogger
import dateutil.parser
import xgboost
@ -17,6 +18,9 @@ from bugbug import bug_features, bugzilla, feature_cleanup, utils
from bugbug.bugzilla import get_product_component_count
from bugbug.model import BugModel
basicConfig(level=INFO)
logger = getLogger(__name__)
class ComponentModel(BugModel):
PRODUCTS = {
@ -164,9 +168,9 @@ class ComponentModel(BugModel):
component_counts = Counter(classes.values()).most_common()
top_components = set(component for component, count in component_counts)
print(f"{len(top_components)} components")
logger.info("%d components", len(top_components))
for component, count in component_counts:
print(f"{component}: {count}")
logger.info("%s: %d", component, count)
# Assert there is at least one bug for each conflated component.
for conflated_component in self.CONFLATED_COMPONENTS:
@ -242,14 +246,18 @@ class ComponentModel(BugModel):
full_comp = f"{product}::{component}"
if full_comp not in bugs_number.keys():
print(
f"Component {component!r} of product {product!r} doesn't exists, failure"
logger.warning(
"Component %r of product %r doesn't exists, failure",
component,
product,
)
success = False
elif bugs_number[full_comp] <= 0:
print(
f"Component {component!r} of product {product!r} have 0 bugs or less in it, failure"
logger.warning(
"Component %r of product %r have 0 bugs or less in it, failure",
component,
product,
)
success = False
@ -265,7 +273,7 @@ class ComponentModel(BugModel):
]
if not matching_components:
print(f"{conflated_component} doesn't match any component")
logger.warning("%s doesn't match any component", conflated_component)
success = False
continue
@ -276,8 +284,9 @@ class ComponentModel(BugModel):
]
if not matching_components_values:
print(
f"{conflated_component} should match at least one component with more than 0 bugs"
logger.warning(
"%s should match at least one component with more than 0 bugs",
conflated_component,
)
success = False
@ -286,13 +295,15 @@ class ComponentModel(BugModel):
for full_comp in self.CONFLATED_COMPONENTS_MAPPING.values():
if full_comp not in bugs_number:
print(
f"{full_comp} from conflated component mapping doesn't exists, failure"
logger.warning(
"%s from conflated component mapping doesn't exists, failure",
full_comp,
)
success = False
elif bugs_number[full_comp] <= 0:
print(
f"{full_comp} from conflated component mapping have less than 1 bug, failure"
logger.warning(
"%s from conflated component mapping have less than 1 bug, failure",
full_comp,
)
success = False
@ -309,7 +320,7 @@ class ComponentModel(BugModel):
]
if not (matching_components or in_mapping):
print(f"It should be possible to map {conflated_component}")
logger.warning("It should be possible to map %s", conflated_component)
success = False
continue
@ -336,15 +347,16 @@ class ComponentModel(BugModel):
if not meaningful_product_components.issubset(
self.meaningful_product_components
):
print("Meaningful product components mismatch")
logger.warning("Meaningful product components mismatch")
new_meaningful_product_components = (
meaningful_product_components.difference(
self.meaningful_product_components
)
)
print(
f"New meaningful product components {new_meaningful_product_components!r}"
logger.info(
"New meaningful product components %r",
new_meaningful_product_components,
)
success = False

Просмотреть файл

@ -4,6 +4,7 @@
# You can obtain one at http://mozilla.org/MPL/2.0/.
import itertools
from logging import INFO, basicConfig, getLogger
from typing import Any
import xgboost
@ -15,6 +16,9 @@ from sklearn.pipeline import Pipeline
from bugbug import bug_features, bugzilla, feature_cleanup, labels, utils
from bugbug.model import BugModel
basicConfig(level=INFO)
logger = getLogger(__name__)
class DefectModel(BugModel):
def __init__(self, lemmatization=False, historical=False):
@ -254,8 +258,8 @@ class DefectModel(BugModel):
def get_labels(self) -> tuple[dict[int, Any], list[Any]]:
classes = self.get_bugbug_labels("bug")
print("{} bugs".format(sum(1 for label in classes.values() if label == 1)))
print("{} non-bugs".format(sum(1 for label in classes.values() if label == 0)))
logger.info("%d bugs", (sum(1 for label in classes.values() if label == 1)))
logger.info("%d non-bugs", (sum(1 for label in classes.values() if label == 0)))
return classes, [0, 1]

Просмотреть файл

@ -3,10 +3,14 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from logging import INFO, basicConfig, getLogger
from typing import Any
from bugbug.models.defect import DefectModel
basicConfig(level=INFO)
logger = getLogger(__name__)
class DefectEnhancementTaskModel(DefectModel):
def __init__(self, lemmatization=False, historical=False):
@ -17,19 +21,14 @@ class DefectEnhancementTaskModel(DefectModel):
def get_labels(self) -> tuple[dict[int, Any], list[Any]]:
classes = self.get_bugbug_labels("defect_enhancement_task")
print(
"{} defects".format(
sum(1 for label in classes.values() if label == "defect")
)
logger.info(
"%d defects", sum(1 for label in classes.values() if label == "defect")
)
print(
"{} enhancements".format(
sum(1 for label in classes.values() if label == "enhancement")
)
)
print(
"{} tasks".format(sum(1 for label in classes.values() if label == "task"))
logger.info(
"%d enhancements",
sum(1 for label in classes.values() if label == "enhancement"),
)
logger.info("%d tasks", sum(1 for label in classes.values() if label == "task"))
return classes, ["defect", "enhancement", "task"]

Просмотреть файл

@ -3,10 +3,14 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from logging import INFO, basicConfig, getLogger
from typing import Any
from bugbug.models.defect import DefectModel
basicConfig(level=INFO)
logger = getLogger(__name__)
class RegressionModel(DefectModel):
def __init__(self, lemmatization=False, historical=False):
@ -16,15 +20,11 @@ class RegressionModel(DefectModel):
def get_labels(self) -> tuple[dict[int, Any], list[int]]:
classes = self.get_bugbug_labels("regression")
print(
"{} regression bugs".format(
sum(1 for label in classes.values() if label == 1)
)
logger.info(
"%d regression bugs", sum(1 for label in classes.values() if label == 1)
)
print(
"{} non-regression bugs".format(
sum(1 for label in classes.values() if label == 0)
)
logger.info(
"%d non-regression bugs", sum(1 for label in classes.values() if label == 0)
)
return classes, [0, 1]

Просмотреть файл

@ -3,6 +3,8 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from logging import INFO, basicConfig, getLogger
import xgboost
from imblearn.under_sampling import RandomUnderSampler
from sklearn.compose import ColumnTransformer
@ -12,6 +14,9 @@ from sklearn.pipeline import Pipeline
from bugbug import bug_features, bugzilla, feature_cleanup, utils
from bugbug.model import BugModel
basicConfig(level=INFO)
logger = getLogger(__name__)
class RegressionRangeModel(BugModel):
def __init__(self, lemmatization=False):
@ -73,15 +78,13 @@ class RegressionRangeModel(BugModel):
classes[bug_id] = 1
elif bug_data["cf_has_regression_range"] == "no":
classes[bug_id] = 0
print(
"{} bugs have regression range".format(
sum(1 for label in classes.values() if label == 1)
)
logger.info(
"%d bugs have regression range",
sum(1 for label in classes.values() if label == 1),
)
print(
"{} bugs don't have a regression range".format(
sum(1 for label in classes.values() if label == 0)
)
logger.info(
"%d bugs don't have a regression range",
sum(1 for label in classes.values() if label == 0),
)
return classes, [0, 1]

Просмотреть файл

@ -5,6 +5,7 @@
import itertools
from datetime import datetime
from logging import INFO, basicConfig, getLogger
import dateutil.parser
import numpy as np
@ -18,6 +19,9 @@ from sklearn.pipeline import Pipeline
from bugbug import bugzilla, commit_features, db, feature_cleanup, repository, utils
from bugbug.model import CommitModel
basicConfig(level=INFO)
logger = getLogger(__name__)
BUG_FIXING_COMMITS_DB = "data/bug_fixing_commits.json"
db.register(
BUG_FIXING_COMMITS_DB,
@ -188,16 +192,14 @@ class RegressorModel(CommitModel):
classes[node] = 0
print(
"{} commits caused regressions".format(
sum(1 for label in classes.values() if label == 1)
)
logger.info(
"%d commits caused regressions",
sum(1 for label in classes.values() if label == 1),
)
print(
"{} commits did not cause regressions".format(
sum(1 for label in classes.values() if label == 0)
)
logger.info(
"%d commits did not cause regressions",
sum(1 for label in classes.values() if label == 0),
)
return classes, [0, 1]
@ -224,9 +226,9 @@ class RegressorModel(CommitModel):
commits.append(commit_data)
print(f"{len(commits)} commits in the evaluation set")
logger.info("%d commits in the evaluation set", len(commits))
bugs_num = len(set(commit["bug_id"] for commit in commits))
print(f"{bugs_num} bugs in the evaluation set")
logger.info("%d bugs in the evaluation set", bugs_num)
# Sort commits by bug ID, so we can use itertools.groupby to group them by bug ID.
commits.sort(key=lambda x: x["bug_id"])
@ -247,7 +249,7 @@ class RegressorModel(CommitModel):
total_regressions = sum(1 for _, is_reg in results if is_reg)
average_regression_rate = total_regressions / total_landings
print(f"Average risk is {average_regression_rate}")
logger.info("Average risk is %d", average_regression_rate)
MIN_SAMPLE = 200

Просмотреть файл

@ -3,6 +3,8 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from logging import INFO, basicConfig, getLogger
import xgboost
from imblearn.over_sampling import BorderlineSMOTE
from sklearn.compose import ColumnTransformer
@ -12,6 +14,9 @@ from sklearn.pipeline import Pipeline
from bugbug import bug_features, bugzilla, feature_cleanup, utils
from bugbug.model import BugModel
basicConfig(level=INFO)
logger = getLogger(__name__)
class SpamBugModel(BugModel):
def __init__(self, lemmatization=False):
@ -105,15 +110,13 @@ class SpamBugModel(BugModel):
elif bug_data["product"] == "Invalid Bugs":
classes[bug_id] = 1
print(
"{} bugs are classified as non-spam".format(
sum(1 for label in classes.values() if label == 0)
)
logger.info(
"%d bugs are classified as non-spam",
sum(1 for label in classes.values() if label == 0),
)
print(
"{} bugs are classified as spam".format(
sum(1 for label in classes.values() if label == 1)
)
logger.info(
"%d bugs are classified as spam",
sum(1 for label in classes.values() if label == 1),
)
return classes, [0, 1]

Просмотреть файл

@ -3,6 +3,8 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from logging import INFO, basicConfig, getLogger
import xgboost
from imblearn.under_sampling import RandomUnderSampler
from sklearn.compose import ColumnTransformer
@ -12,6 +14,9 @@ from sklearn.pipeline import Pipeline
from bugbug import bug_features, bugzilla, feature_cleanup, utils
from bugbug.model import BugModel
basicConfig(level=INFO)
logger = getLogger(__name__)
class StepsToReproduceModel(BugModel):
def __init__(self, lemmatization=False):
@ -78,15 +83,13 @@ class StepsToReproduceModel(BugModel):
if change["removed"].startswith("stepswanted"):
classes[int(bug_data["id"])] = 1
print(
"{} bugs have no steps to reproduce".format(
sum(1 for label in classes.values() if label == 0)
)
logger.info(
"%d bugs have no steps to reproduce",
sum(1 for label in classes.values() if label == 0),
)
print(
"{} bugs have steps to reproduce".format(
sum(1 for label in classes.values() if label == 1)
)
logger.info(
"%d bugs have steps to reproduce",
sum(1 for label in classes.values() if label == 1),
)
return classes, [0, 1]

Просмотреть файл

@ -3,6 +3,8 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from logging import INFO, basicConfig, getLogger
import xgboost
from imblearn.under_sampling import RandomUnderSampler
from sklearn.compose import ColumnTransformer
@ -12,6 +14,9 @@ from sklearn.pipeline import Pipeline
from bugbug import commit_features, repository, test_scheduling, utils
from bugbug.model import CommitModel
basicConfig(level=INFO)
logger = getLogger(__name__)
class TestFailureModel(CommitModel):
def __init__(self, lemmatization=False):
@ -96,15 +101,12 @@ class TestFailureModel(CommitModel):
else:
classes[rev] = 0
print(
"{} commits failed".format(
sum(1 for label in classes.values() if label == 1)
)
logger.info(
"%d commits failed", sum(1 for label in classes.values() if label == 1)
)
print(
"{} commits did not fail".format(
sum(1 for label in classes.values() if label == 0)
)
logger.info(
"%d commits did not fail",
sum(1 for label in classes.values() if label == 0),
)
return classes, [0, 1]

Просмотреть файл

@ -31,6 +31,7 @@ from bugbug import (
)
from bugbug.model import Model
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@ -497,8 +498,10 @@ class TestSelectModel(Model):
len(push["failures"]) + len(push["passes"])
for push in pushes[:train_push_len]
)
print(
f"{train_push_len} pushes in the training set (corresponding to {train_len} push/jobs)"
logger.info(
"%d pushes in the training set (corresponding to %d push/jobs)",
train_push_len,
train_len,
)
return X[:train_len], X[train_len:], y[:train_len], y[train_len:]
@ -534,21 +537,17 @@ class TestSelectModel(Model):
for name in push["passes"]:
classes[(push["revs"][0], name)] = 0
print("{} pushes considered".format(len(pushes)))
print(
"{} pushes with at least one failure".format(
sum(1 for push in pushes if len(push["failures"]) > 0)
)
logger.info("%d pushes considered", len(pushes))
logger.info(
"%d pushes with at least one failure",
sum(1 for push in pushes if len(push["failures"]) > 0),
)
print(
"{} push/jobs failed".format(
sum(1 for label in classes.values() if label == 1)
)
logger.info(
"%d push/jobs failed", sum(1 for label in classes.values() if label == 1)
)
print(
"{} push/jobs did not fail".format(
sum(1 for label in classes.values() if label == 0)
)
logger.info(
"%d push/jobs did not fail",
sum(1 for label in classes.values() if label == 0),
)
return classes, [0, 1]
@ -595,7 +594,7 @@ class TestSelectModel(Model):
# To evaluate the model with reductions enabled, we need to regenerate the failing together DB, using
# only failure data from the training pushes (otherwise, we'd leak training information into the test
# set).
print("Generate failing together DB (restricted to training pushes)")
logger.info("Generate failing together DB (restricted to training pushes)")
push_data_iter, push_data_count, _ = test_scheduling.get_push_data(
"label" if self.granularity == "label" else "config_group"
)
@ -644,12 +643,16 @@ class TestSelectModel(Model):
for push in test_pushes.values()
if "config_group_failures" not in push
)
print(
f"{missing_config_group_failures} pushes without config_group failures"
logger.info(
"%d pushes without config_group failures", missing_config_group_failures
)
print(
f"Testing on {len(test_pushes)} ({test_pushes_failures} with failures) out of {len(pushes)}. {len(all_tasks)} schedulable tasks."
logger.info(
"Testing on %d (%d with failures) out of %d. %d schedulable tasks.",
len(test_pushes),
test_pushes_failures,
len(pushes),
len(all_tasks),
)
del pushes
@ -705,10 +708,8 @@ class TestSelectModel(Model):
for future in concurrent.futures.as_completed(futures):
exc = future.exception()
if exc is not None:
print(
"Exception {} while running {}".format(
exc, futures[future]["revs"][0]
)
logger.error(
"Exception %s while running %s", exc, futures[future]["revs"][0]
)
for f in futures:
f.cancel()
@ -833,9 +834,11 @@ class TestSelectModel(Model):
and result["caught_percentage_config_group"] is not None
)
message += f" In {percentage_caught_one_config_group}% of pushes we caught at least one config/group failure. On average, we caught {average_caught_percentage_config_group}% of all seen config/group failures."
print(message)
logger.info(
"In %d%% of pushes we caught at least one config/group failure. On average, we caught %f%% of all seen config/group failures.",
percentage_caught_one_config_group,
average_caught_percentage_config_group,
)
with concurrent.futures.ProcessPoolExecutor(
max_workers=utils.get_physical_cpu_count(),

Просмотреть файл

@ -6,11 +6,15 @@
import sys
from collections import defaultdict
from functools import lru_cache
from logging import INFO, basicConfig, getLogger
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_extraction.text import TfidfVectorizer
basicConfig(level=INFO)
logger = getLogger(__name__)
HAS_OPTIONAL_DEPENDENCIES = False
try:
@ -26,11 +30,10 @@ try:
if HAS_OPTIONAL_DEPENDENCIES:
nlp = spacy.load("en_core_web_sm")
except OSError:
msg = (
"Spacy model is missing, install it with: "
f"{sys.executable} -m spacy download en_core_web_sm"
logger.error(
"Spacy model is missing, install it with: %s -m spacy download en_core_web_sm",
sys.executable,
)
print(msg, file=sys.stderr)
OPT_MSG_MISSING = (
"Optional dependencies are missing, install them with: pip install bugbug[nlp]\n"

Просмотреть файл

@ -14,6 +14,7 @@ from tqdm import tqdm
from bugbug import db
from bugbug.db import LastModifiedNotAvailable
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
RevisionDict = NewType("RevisionDict", dict)
@ -126,14 +127,14 @@ def download_revisions(rev_ids: Collection[int]) -> None:
if rev["id"] in new_rev_ids:
new_rev_ids.remove(rev["id"])
print(f"Loaded {old_rev_count} revisions.")
logger.info("Loaded %d revisions.", old_rev_count)
new_rev_ids_list = sorted(list(new_rev_ids))
rev_ids_groups = (
new_rev_ids_list[i : i + 100] for i in range(0, len(new_rev_ids_list), 100)
)
print(f"{len(new_rev_ids_list)} revisions left to download")
logger.info("%d revisions left to download", len(new_rev_ids_list))
with tqdm(total=len(new_rev_ids)) as progress_bar:
for rev_ids_group in rev_ids_groups:

Просмотреть файл

@ -235,15 +235,15 @@ class BaseSimilarity(abc.ABC):
apk.append(score / min(len(duplicates[bug["id"]]), 10))
print(f"Recall @ 1: {recall_rate_1/total_r * 100}%")
print(f"Recall @ 5: {recall_rate_5/total_r * 100}%")
print(f"Recall @ 10: {recall_rate_10/total_r * 100}%")
print(f"Precision @ 1: {precision_rate_1/queries * 100}%")
print(f"Precision @ 5: {precision_rate_5/queries * 100}%")
print(f"Precision @ 10: {precision_rate_10/queries * 100}%")
print(f"Recall: {hits_r/total_r * 100}%")
print(f"Precision: {hits_p/total_p * 100}%")
print(f"MAP@k : {np.mean(apk) * 100}%")
logger.info("Recall @ 1: %d%", recall_rate_1 / total_r * 100)
logger.info("Recall @ 5: %d%", recall_rate_5 / total_r * 100)
logger.info("Recall @ 10: %d%", recall_rate_10 / total_r * 100)
logger.info("Precision @ 1: %d%", precision_rate_1 / queries * 100)
logger.info("Precision @ 5: %d%", precision_rate_5 / queries * 100)
logger.info("Precision @ 10: %d%", precision_rate_10 / queries * 100)
logger.info("Recall: %d%", hits_r / total_r * 100)
logger.info("Precision: %d%", hits_p / total_p * 100)
logger.info("MAP@k : %d%", np.mean(apk) * 100)
@abc.abstractmethod
def get_distance(self, query1, query2):
@ -407,7 +407,7 @@ class Word2VecWmdSimilarity(Word2VecSimilarityBase):
def wmdistance(self, document1, document2, all_distances, distance_metric="cosine"):
model = self.w2vmodel
if len(document1) == 0 or len(document2) == 0:
print(
logger.warning(
"At least one of the documents had no words that were in the vocabulary. Aborting (returning inf)."
)
return float("inf")
@ -434,7 +434,9 @@ class Word2VecWmdSimilarity(Word2VecSimilarityBase):
distance_matrix[i, j] = all_distances[model.wv.vocab[t2].index, i]
if np.sum(distance_matrix) == 0.0:
print("The distance matrix is all zeros. Aborting (returning inf).")
logger.warning(
"The distance matrix is all zeros. Aborting (returning inf)."
)
return float("inf")
def nbow(document):

Просмотреть файл

@ -33,6 +33,7 @@ from tqdm import tqdm
from bugbug import db, repository
from bugbug.utils import ExpQueue, LMDBDict
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
Revision = NewType("Revision", str)
@ -281,8 +282,9 @@ def get_push_data(
)
)
print(
f"{manifest_combinations} possible combinations of manifests on configurations"
logger.info(
"%d possible combinations of manifests on configurations",
manifest_combinations,
)
return push_data_iter, push_data_count, all_runnables

Просмотреть файл

@ -5,9 +5,13 @@
import os
import time
from logging import INFO, basicConfig, getLogger
import requests
basicConfig(level=INFO)
logger = getLogger(__name__)
BUGBUG_HTTP_SERVER = os.environ.get("BUGBUG_HTTP_SERVER", "http://localhost:8000/")
@ -30,7 +34,7 @@ def integration_test_single():
if not response.ok:
raise Exception(f"Couldn't get an answer in {timeout} seconds: {response_json}")
print("Response for bug 1376406", response_json)
logger.info("Response for bug 1376406 %s", response_json)
assert response_json["class"] is not None
@ -55,10 +59,10 @@ def integration_test_batch():
raise Exception(f"Couldn't get an answer in {timeout} seconds: {response_json}")
response_1376544 = response_json["bugs"]["1376544"]
print("Response for bug 1376544", response_1376544)
logger.info("Response for bug 1376544 %s", response_1376544)
assert response_1376544["class"] is not None
response_1376412 = response_json["bugs"]["1376412"]
print("Response for bug 1376412", response_1376412)
logger.info("Response for bug 1376412 %s", response_1376412)
assert response_1376412["class"] is not None

Просмотреть файл

@ -21,13 +21,16 @@ This script triggers the data pipeline for the bugbug project
import argparse
import os
import sys
from logging import INFO, basicConfig, getLogger
import jsone
import requests.packages.urllib3
import taskcluster
import yaml
basicConfig(level=INFO)
logger = getLogger(__name__)
requests.packages.urllib3.disable_warnings()
TASKCLUSTER_DEFAULT_URL = "https://community-tc.services.mozilla.com"
@ -128,9 +131,11 @@ def main():
for task_id, task_payload in tasks:
queue.createTask(task_id, task_payload)
print(f"https://community-tc.services.mozilla.com/tasks/groups/{task_group_id}")
except taskcluster.exceptions.TaskclusterAuthFailure as e:
print(f"TaskclusterAuthFailure: {e.body}", file=sys.stderr)
logger.info(
"https://community-tc.services.mozilla.com/tasks/groups/%s", task_group_id
)
except taskcluster.exceptions.TaskclusterAuthFailure:
logger.exception("Failed to authenticate with Taskcluster")
raise

Просмотреть файл

@ -4,12 +4,16 @@
# You can obtain one at http://mozilla.org/MPL/2.0/.
import argparse
import json
from logging import INFO, basicConfig, getLogger
from mozci.push import Push
from tqdm import tqdm
from bugbug import db, repository
basicConfig(level=INFO)
logger = getLogger(__name__)
def go() -> None:
assert db.download(repository.COMMITS_DB)
@ -49,10 +53,10 @@ def go() -> None:
"group": group_regressions,
}
print(f"Likely labels for backouts: {likely_label_count}")
print(f"Likely groups for backouts: {likely_group_count}")
print(f"Possible labels for backouts: {possible_label_count}")
print(f"Possible groups for backouts: {possible_group_count}")
logger.info("Likely labels for backouts: %d", likely_label_count)
logger.info("Likely groups for backouts: %d", likely_group_count)
logger.info("Possible labels for backouts: %d", possible_label_count)
logger.info("Possible groups for backouts: %d", possible_group_count)
backedout_regressions = {}

Просмотреть файл

@ -4,12 +4,16 @@ import argparse
import csv
import os
from datetime import datetime, timedelta
from logging import INFO, basicConfig, getLogger
import numpy as np
from bugbug import bugzilla
from bugbug.models import get_model_class
basicConfig(level=INFO)
logger = getLogger(__name__)
def generate_sheet(model_name: str, token: str, days: int, threshold: float) -> None:
model_file_name = f"{model_name}model"
@ -25,7 +29,7 @@ def generate_sheet(model_name: str, token: str, days: int, threshold: float) ->
bug_ids = bugzilla.get_ids_between(datetime.utcnow() - timedelta(days))
bugs = bugzilla.get(bug_ids)
print(f"Classifying {len(bugs)} bugs...")
logger.info("Classifying %d bugs...", len(bugs))
rows = [["Bug", f"{model_name}(model)", model_name, "Title"]]

Просмотреть файл

@ -9,6 +9,10 @@ import json
import os
import sys
from datetime import date, datetime, timedelta
from logging import INFO, basicConfig, getLogger
basicConfig(level=INFO)
logger = getLogger(__name__)
# Inject project path
sys.path.append("../")
@ -61,16 +65,16 @@ def run_untriaged(untriaged_bugs):
bug["product"], bug["component"]
)
if not expected_component:
print("Skipping bug: {}".format(bug["id"]))
logger.info("Skipping bug: %s", bug["id"])
continue
if classifiable:
print("Classifying bug with ID: {}".format(bug["id"]))
logger.info("Classifying bug with ID: %s", bug["id"])
classification = model.classify(bug)[0]
print("Classified bug as: {}".format(classification))
logger.info("Classified bug as: %s", classification)
else:
print("Not classifiable bug: {}".format(bug["id"]))
logger.info("Not classifiable bug: %s", bug["id"])
correct_prediction = expected_component == classification
rows.append(

Просмотреть файл

@ -48,7 +48,7 @@ def classify_issues(
issues = github.get_issues()
for issue in issues:
print(f'{issue["url"]} - {issue["title"]} ')
logger.info("%s - %s ", issue["url"], issue["title"])
if model.calculate_importance:
probas, importance = model.classify(
@ -67,7 +67,7 @@ def classify_issues(
pred_class = model.le.inverse_transform([pred_index])[0]
else:
pred_class = "Positive" if pred_index == 1 else "Negative"
print(f"{pred_class} {probability}")
logger.info("%s %s", pred_class, probability)
input()

Просмотреть файл

@ -4,13 +4,14 @@
# You can obtain one at http://mozilla.org/MPL/2.0/.
import argparse
from logging import getLogger
from logging import INFO, basicConfig, getLogger
import dateutil.parser
from bugbug import bugzilla
from bugbug.utils import get_secret
basicConfig(level=INFO)
logger = getLogger(__name__)
@ -44,7 +45,7 @@ def main():
"If you want to include security bugs too, please set the BUGBUG_BUGZILLA_TOKEN environment variable to your Bugzilla API key."
)
print(
logger.info(
round(
bugzilla.calculate_maintenance_effectiveness_indicator(
args.team,

Просмотреть файл

@ -3,19 +3,24 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
from logging import INFO, basicConfig, getLogger
from bugbug import db, test_scheduling
basicConfig(level=INFO)
logger = getLogger(__name__)
def count(is_first_task, is_second_task):
assert db.download(test_scheduling.PUSH_DATA_LABEL_DB)
push_data = list(db.read(test_scheduling.PUSH_DATA_LABEL_DB))
print(f"Analyzing {len(push_data)} pushes...")
logger.info("Analyzing %d pushes...", len(push_data))
all_tasks = set(task for _, _, push_tasks, _, _ in push_data for task in push_tasks)
print(f"Considering {len(all_tasks)} tasks...")
logger.info("Considering %d tasks...", len(all_tasks))
count_runs = 0
count_any_of_the_two = 0
@ -96,8 +101,12 @@ def main():
count_first_but_not_second,
count_second_but_not_first,
) = count(is_first_task, is_second_task)
print(
f"Out of {count_runs} runs, any of the two failed {count_any_of_the_two} times. The first exclusively failed {count_first_but_not_second} times, the second exclusively failed {count_second_but_not_first} times."
logger.info(
"Out of %d runs, any of the two failed %d times. The first exclusively failed %d times, the second exclusively failed %d times.",
count_runs,
count_any_of_the_two,
count_first_but_not_second,
count_second_but_not_first,
)

Просмотреть файл

@ -19,6 +19,7 @@ from tqdm import tqdm
from bugbug import db, test_scheduling, utils
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@ -298,19 +299,32 @@ def print_uncaught(
if len(caught_by_scheduler[scheduler1]) == 0:
if scheduler2 is not None and scheduler2 not in caught_by_scheduler:
print(
f"{scheduler1} didn't catch any of the {len(regressions)} regressions on {rev}"
logger.info(
"%s didn't catch any of the %d regressions on %s",
scheduler1,
len(regressions),
rev,
)
elif scheduler2 is not None and len(caught_by_scheduler[scheduler2]) == 0:
print(
f"{scheduler1} and {scheduler2} didn't catch any of the {len(regressions)} regressions on {rev}"
logger.info(
"%s and %s didn't catch any of the %d regressions on %s",
scheduler1,
scheduler2,
len(regressions),
rev,
)
else:
print(
f"{scheduler1} didn't catch any of the {len(regressions)} regressions on {rev}, while {scheduler2} did"
logger.info(
"%s didn't catch any of the %d regressions on %s, while %s did",
scheduler1,
len(regressions),
rev,
scheduler2,
)
print(f"Regressions: {regressions}")
print(f"Scheduled by {scheduler1}: {scheduled_by_scheduler[scheduler1]}")
logger.info("Regressions: %s", regressions)
logger.info(
"Scheduled by %s: %s", scheduler1, scheduled_by_scheduler[scheduler1]
)
def main() -> None:

Просмотреть файл

@ -4,10 +4,14 @@
# You can obtain one at http://mozilla.org/MPL/2.0/.
import importlib
from logging import INFO, basicConfig, getLogger
from bugbug import model
from bugbug.models import MODELS, get_model_class
basicConfig(level=INFO)
logger = getLogger(__name__)
def test_import_all_models():
"""Try loading all defined models to ensure that their full qualified
@ -18,7 +22,7 @@ def test_import_all_models():
if model_name == "component_nn" and not importlib.util.find_spec("tensorflow"):
continue
print("Try loading model", model_name)
logger.info("Try loading model %s", model_name)
get_model_class(model_name)

Просмотреть файл

@ -9,6 +9,7 @@ import pickle
import shutil
import time
from datetime import datetime, timezone
from logging import INFO, basicConfig, getLogger
import hglib
import pytest
@ -19,6 +20,9 @@ from dateutil.relativedelta import relativedelta
from bugbug import commit_features, repository, rust_code_analysis_server
basicConfig(level=INFO)
logger = getLogger(__name__)
@pytest.fixture
def fake_hg_repo(tmpdir):
@ -2399,7 +2403,7 @@ void main() {
patch_data = rs_parsepatch.get_lines(patch)
print(patch_data)
logger.info(patch_data)
assert len(patch_data) == 1
@ -2650,7 +2654,7 @@ void main() {
patch_data = rs_parsepatch.get_lines(patch)
print(patch_data)
logger.info(patch_data)
assert len(patch_data) == 1