Refactor print statements to use logger (#3330)

2023-03-14 12:54:15 -04:00 · 2023-03-14 12:54:15 -04:00 · 8936218ab2
--- a/bugbug/bug_snapshot.py
+++ b/bugbug/bug_snapshot.py
@ -3,11 +3,16 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
 # You can obtain one at http://mozilla.org/MPL/2.0/.

+from logging import INFO, basicConfig, getLogger
+
 import dateutil.parser
 from dateutil.relativedelta import relativedelta

 from bugbug import bugzilla

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 def bool_str(val):
    assert val in ["", "0", "1"], f"Unexpected boolean value: '{val}'"
@ -536,7 +541,7 @@ def rollback(bug, when=None, do_assert=False):
        if do_assert:
            assert False, msg
        else:
-            print(msg)
+            logger.error(msg)

    def parse_flag_change(change):
        parts = change.split("(")
@ -869,9 +874,8 @@ def get_inconsistencies(bugs):
    for bug in bugs:
        try:
            rollback(bug, do_assert=True)
-        except Exception as e:
-            print(bug["id"])
-            print(e)
+        except Exception:
+            logger.exception("Failed to rollback bug %s", bug["id"])
            inconsistencies.append(bug)

    return inconsistencies
@ -888,6 +892,6 @@ if __name__ == "__main__":

    for bug in tqdm(bugzilla.get_bugs()):
        if args.verbose:
-            print(bug["id"])
+            logger.info(bug["id"])

        rollback(bug, do_assert=True)
--- a/bugbug/bugzilla.py
+++ b/bugbug/bugzilla.py
@ -7,6 +7,7 @@ import collections
 import csv
 import re
 from datetime import datetime
+from logging import INFO, basicConfig, getLogger
 from typing import Iterable, Iterator, NewType, Optional

 import tenacity
@ -16,6 +17,9 @@ from tqdm import tqdm

 from bugbug import db, utils

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+
 BugDict = NewType("BugDict", dict)

 BUGS_DB = "data/bugs.json"
@ -191,7 +195,7 @@ def download_bugs(bug_ids: Iterable[int], security: bool = False) -> list[BugDic
        old_bug_count += 1
        new_bug_ids_set.discard(int(bug["id"]))

-    print(f"Loaded {old_bug_count} bugs.")
+    logger.info("Loaded %d bugs.", old_bug_count)

    new_bug_ids = sorted(list(new_bug_ids_set))

@ -417,8 +421,11 @@ def calculate_maintenance_effectiveness_indicator(
        "closed": {},
    }

-    print(
-        f"Calculating maintenance effectiveness indicator for the {team} team from {from_date} to {to_date}"
+    logger.info(
+        "Calculating maintenance effectiveness indicator for the %s team from %s to %s",
+        team,
+        from_date,
+        to_date,
    )

    for severity in MAINTENANCE_EFFECTIVENESS_SEVERITY_WEIGHTS.keys():
--- a/bugbug/model.py
+++ b/bugbug/model.py
@ -5,6 +5,7 @@

 import pickle
 from collections import defaultdict
+from logging import INFO, basicConfig, getLogger
 from typing import Any

 import matplotlib
@ -29,6 +30,9 @@ from bugbug.github import Github
 from bugbug.nlp import SpacyVectorizer
 from bugbug.utils import split_tuple_generator, to_array

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 def classification_report_imbalanced_values(
    y_true, y_pred, labels, target_names=None, sample_weight=None, digits=2, alpha=0.1
@ -398,7 +402,7 @@ class Model:

        self.clf.fit(X_train, self.le.transform(y_train))

-        print("Model trained")
+        logger.info("Model trained")

        feature_names = self.get_human_readable_feature_names()
        if self.calculate_importance and len(feature_names):
--- a/bugbug/models/annotate_ignore.py
+++ b/bugbug/models/annotate_ignore.py
@ -3,6 +3,8 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
 # You can obtain one at http://mozilla.org/MPL/2.0/.

+from logging import INFO, basicConfig, getLogger
+
 import xgboost
 from imblearn.under_sampling import RandomUnderSampler
 from sklearn.compose import ColumnTransformer
@ -12,6 +14,9 @@ from sklearn.pipeline import Pipeline
 from bugbug import bugzilla, commit_features, feature_cleanup, labels, repository, utils
 from bugbug.model import CommitModel

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 class AnnotateIgnoreModel(CommitModel):
    def __init__(self, lemmatization: bool = False) -> None:
@ -105,16 +110,14 @@ class AnnotateIgnoreModel(CommitModel):
        for node, label in labels.get_labels("annotateignore"):
            classes[node] = int(label)

-        print(
-            "{} commits that can be ignored".format(
-                sum(1 for label in classes.values() if label == 1)
-            )
+        logger.info(
+            "%d commits that can be ignored",
+            sum(1 for label in classes.values() if label == 1),
        )

-        print(
-            "{} commits that cannot be ignored".format(
-                sum(1 for label in classes.values() if label == 0)
-            )
+        logger.info(
+            "%d commits that cannot be ignored",
+            sum(1 for label in classes.values() if label == 0),
        )

        return classes, [0, 1]
--- a/bugbug/models/assignee.py
+++ b/bugbug/models/assignee.py
@ -4,6 +4,7 @@
 # You can obtain one at http://mozilla.org/MPL/2.0/.

 from collections import Counter
+from logging import INFO, basicConfig, getLogger

 import xgboost
 from sklearn.compose import ColumnTransformer
@ -24,6 +25,9 @@ ADDRESSES_TO_EXCLUDE = [
    "nobody@t4b.me",
 ]

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 class AssigneeModel(BugModel):
    def __init__(self, lemmatization=False):
@ -100,9 +104,9 @@ class AssigneeModel(BugModel):
            if count > MINIMUM_ASSIGNMENTS
        )

-        print(f"{len(top_assignees)} assignees")
+        logger.info("%d assignees", len(top_assignees))
        for assignee, count in assignee_counts:
-            print(f"{assignee}: {count}")
+            logger.info("%s: %d", assignee, count)

        classes = {
            bug_id: assignee
--- a/bugbug/models/backout.py
+++ b/bugbug/models/backout.py
@ -4,6 +4,7 @@
 # You can obtain one at http://mozilla.org/MPL/2.0/.

 from datetime import datetime
+from logging import INFO, basicConfig, getLogger

 import dateutil.parser
 import xgboost
@ -16,6 +17,9 @@ from sklearn.pipeline import Pipeline
 from bugbug import bug_features, commit_features, feature_cleanup, repository, utils
 from bugbug.model import CommitModel

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 class BackoutModel(CommitModel):
    def __init__(self, lemmatization=False, bug_data=False):
@ -107,15 +111,13 @@ class BackoutModel(CommitModel):

            classes[commit_data["node"]] = 1 if commit_data["backedoutby"] else 0

-        print(
-            "{} commits were backed out".format(
-                sum(1 for label in classes.values() if label == 1)
-            )
+        logger.info(
+            "%d commits were backed out",
+            sum(1 for label in classes.values() if label == 1),
        )
-        print(
-            "{} commits were not backed out".format(
-                sum(1 for label in classes.values() if label == 0)
-            )
+        logger.info(
+            "%d commits were not backed out",
+            sum(1 for label in classes.values() if label == 0),
        )

        return classes, [0, 1]
--- a/bugbug/models/component.py
+++ b/bugbug/models/component.py
@ -5,6 +5,7 @@

 from collections import Counter
 from datetime import datetime, timezone
+from logging import INFO, basicConfig, getLogger

 import dateutil.parser
 import xgboost
@ -17,6 +18,9 @@ from bugbug import bug_features, bugzilla, feature_cleanup, utils
 from bugbug.bugzilla import get_product_component_count
 from bugbug.model import BugModel

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 class ComponentModel(BugModel):
    PRODUCTS = {
@ -164,9 +168,9 @@ class ComponentModel(BugModel):
        component_counts = Counter(classes.values()).most_common()
        top_components = set(component for component, count in component_counts)

-        print(f"{len(top_components)} components")
+        logger.info("%d components", len(top_components))
        for component, count in component_counts:
-            print(f"{component}: {count}")
+            logger.info("%s: %d", component, count)

        # Assert there is at least one bug for each conflated component.
        for conflated_component in self.CONFLATED_COMPONENTS:
@ -242,14 +246,18 @@ class ComponentModel(BugModel):
            full_comp = f"{product}::{component}"

            if full_comp not in bugs_number.keys():
-                print(
-                    f"Component {component!r} of product {product!r} doesn't exists, failure"
+                logger.warning(
+                    "Component %r of product %r doesn't exists, failure",
+                    component,
+                    product,
                )
                success = False

            elif bugs_number[full_comp] <= 0:
-                print(
-                    f"Component {component!r} of product {product!r} have 0 bugs or less in it, failure"
+                logger.warning(
+                    "Component %r of product %r have 0 bugs or less in it, failure",
+                    component,
+                    product,
                )
                success = False

@ -265,7 +273,7 @@ class ComponentModel(BugModel):
            ]

            if not matching_components:
-                print(f"{conflated_component} doesn't match any component")
+                logger.warning("%s doesn't match any component", conflated_component)
                success = False
                continue

@ -276,8 +284,9 @@ class ComponentModel(BugModel):
            ]

            if not matching_components_values:
-                print(
-                    f"{conflated_component} should match at least one component with more than 0 bugs"
+                logger.warning(
+                    "%s should match at least one component with more than 0 bugs",
+                    conflated_component,
                )
                success = False

@ -286,13 +295,15 @@ class ComponentModel(BugModel):

        for full_comp in self.CONFLATED_COMPONENTS_MAPPING.values():
            if full_comp not in bugs_number:
-                print(
-                    f"{full_comp} from conflated component mapping doesn't exists, failure"
+                logger.warning(
+                    "%s from conflated component mapping doesn't exists, failure",
+                    full_comp,
                )
                success = False
            elif bugs_number[full_comp] <= 0:
-                print(
-                    f"{full_comp} from conflated component mapping have less than 1 bug, failure"
+                logger.warning(
+                    "%s from conflated component mapping have less than 1 bug, failure",
+                    full_comp,
                )
                success = False

@ -309,7 +320,7 @@ class ComponentModel(BugModel):
            ]

            if not (matching_components or in_mapping):
-                print(f"It should be possible to map {conflated_component}")
+                logger.warning("It should be possible to map %s", conflated_component)
                success = False
                continue

@ -336,15 +347,16 @@ class ComponentModel(BugModel):
        if not meaningful_product_components.issubset(
            self.meaningful_product_components
        ):
-            print("Meaningful product components mismatch")
+            logger.warning("Meaningful product components mismatch")

            new_meaningful_product_components = (
                meaningful_product_components.difference(
                    self.meaningful_product_components
                )
            )
-            print(
-                f"New meaningful product components {new_meaningful_product_components!r}"
+            logger.info(
+                "New meaningful product components %r",
+                new_meaningful_product_components,
            )

            success = False
--- a/bugbug/models/defect.py
+++ b/bugbug/models/defect.py
@ -4,6 +4,7 @@
 # You can obtain one at http://mozilla.org/MPL/2.0/.

 import itertools
+from logging import INFO, basicConfig, getLogger
 from typing import Any

 import xgboost
@ -15,6 +16,9 @@ from sklearn.pipeline import Pipeline
 from bugbug import bug_features, bugzilla, feature_cleanup, labels, utils
 from bugbug.model import BugModel

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 class DefectModel(BugModel):
    def __init__(self, lemmatization=False, historical=False):
@ -254,8 +258,8 @@ class DefectModel(BugModel):
    def get_labels(self) -> tuple[dict[int, Any], list[Any]]:
        classes = self.get_bugbug_labels("bug")

-        print("{} bugs".format(sum(1 for label in classes.values() if label == 1)))
-        print("{} non-bugs".format(sum(1 for label in classes.values() if label == 0)))
+        logger.info("%d bugs", (sum(1 for label in classes.values() if label == 1)))
+        logger.info("%d non-bugs", (sum(1 for label in classes.values() if label == 0)))

        return classes, [0, 1]

--- a/bugbug/models/defect_enhancement_task.py
+++ b/bugbug/models/defect_enhancement_task.py
@ -3,10 +3,14 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
 # You can obtain one at http://mozilla.org/MPL/2.0/.

+from logging import INFO, basicConfig, getLogger
 from typing import Any

 from bugbug.models.defect import DefectModel

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 class DefectEnhancementTaskModel(DefectModel):
    def __init__(self, lemmatization=False, historical=False):
@ -17,19 +21,14 @@ class DefectEnhancementTaskModel(DefectModel):
    def get_labels(self) -> tuple[dict[int, Any], list[Any]]:
        classes = self.get_bugbug_labels("defect_enhancement_task")

-        print(
-            "{} defects".format(
-                sum(1 for label in classes.values() if label == "defect")
-            )
+        logger.info(
+            "%d defects", sum(1 for label in classes.values() if label == "defect")
        )
-        print(
-            "{} enhancements".format(
-                sum(1 for label in classes.values() if label == "enhancement")
-            )
-        )
-        print(
-            "{} tasks".format(sum(1 for label in classes.values() if label == "task"))
+        logger.info(
+            "%d enhancements",
+            sum(1 for label in classes.values() if label == "enhancement"),
        )
+        logger.info("%d tasks", sum(1 for label in classes.values() if label == "task"))

        return classes, ["defect", "enhancement", "task"]

--- a/bugbug/models/regression.py
+++ b/bugbug/models/regression.py
@ -3,10 +3,14 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
 # You can obtain one at http://mozilla.org/MPL/2.0/.

+from logging import INFO, basicConfig, getLogger
 from typing import Any

 from bugbug.models.defect import DefectModel

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 class RegressionModel(DefectModel):
    def __init__(self, lemmatization=False, historical=False):
@ -16,15 +20,11 @@ class RegressionModel(DefectModel):
    def get_labels(self) -> tuple[dict[int, Any], list[int]]:
        classes = self.get_bugbug_labels("regression")

-        print(
-            "{} regression bugs".format(
-                sum(1 for label in classes.values() if label == 1)
-            )
+        logger.info(
+            "%d regression bugs", sum(1 for label in classes.values() if label == 1)
        )
-        print(
-            "{} non-regression bugs".format(
-                sum(1 for label in classes.values() if label == 0)
-            )
+        logger.info(
+            "%d non-regression bugs", sum(1 for label in classes.values() if label == 0)
        )

        return classes, [0, 1]
--- a/bugbug/models/regressionrange.py
+++ b/bugbug/models/regressionrange.py
@ -3,6 +3,8 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
 # You can obtain one at http://mozilla.org/MPL/2.0/.

+from logging import INFO, basicConfig, getLogger
+
 import xgboost
 from imblearn.under_sampling import RandomUnderSampler
 from sklearn.compose import ColumnTransformer
@ -12,6 +14,9 @@ from sklearn.pipeline import Pipeline
 from bugbug import bug_features, bugzilla, feature_cleanup, utils
 from bugbug.model import BugModel

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 class RegressionRangeModel(BugModel):
    def __init__(self, lemmatization=False):
@ -73,15 +78,13 @@ class RegressionRangeModel(BugModel):
                    classes[bug_id] = 1
                elif bug_data["cf_has_regression_range"] == "no":
                    classes[bug_id] = 0
-        print(
-            "{} bugs have regression range".format(
-                sum(1 for label in classes.values() if label == 1)
-            )
+        logger.info(
+            "%d bugs have regression range",
+            sum(1 for label in classes.values() if label == 1),
        )
-        print(
-            "{} bugs don't have a regression range".format(
-                sum(1 for label in classes.values() if label == 0)
-            )
+        logger.info(
+            "%d bugs don't have a regression range",
+            sum(1 for label in classes.values() if label == 0),
        )

        return classes, [0, 1]
--- a/bugbug/models/regressor.py
+++ b/bugbug/models/regressor.py
@ -5,6 +5,7 @@

 import itertools
 from datetime import datetime
+from logging import INFO, basicConfig, getLogger

 import dateutil.parser
 import numpy as np
@ -18,6 +19,9 @@ from sklearn.pipeline import Pipeline
 from bugbug import bugzilla, commit_features, db, feature_cleanup, repository, utils
 from bugbug.model import CommitModel

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+
 BUG_FIXING_COMMITS_DB = "data/bug_fixing_commits.json"
 db.register(
    BUG_FIXING_COMMITS_DB,
@ -188,16 +192,14 @@ class RegressorModel(CommitModel):

                classes[node] = 0

-        print(
-            "{} commits caused regressions".format(
-                sum(1 for label in classes.values() if label == 1)
-            )
+        logger.info(
+            "%d commits caused regressions",
+            sum(1 for label in classes.values() if label == 1),
        )

-        print(
-            "{} commits did not cause regressions".format(
-                sum(1 for label in classes.values() if label == 0)
-            )
+        logger.info(
+            "%d commits did not cause regressions",
+            sum(1 for label in classes.values() if label == 0),
        )

        return classes, [0, 1]
@ -224,9 +226,9 @@ class RegressorModel(CommitModel):

            commits.append(commit_data)

-        print(f"{len(commits)} commits in the evaluation set")
+        logger.info("%d commits in the evaluation set", len(commits))
        bugs_num = len(set(commit["bug_id"] for commit in commits))
-        print(f"{bugs_num} bugs in the evaluation set")
+        logger.info("%d bugs in the evaluation set", bugs_num)

        # Sort commits by bug ID, so we can use itertools.groupby to group them by bug ID.
        commits.sort(key=lambda x: x["bug_id"])
@ -247,7 +249,7 @@ class RegressorModel(CommitModel):
        total_regressions = sum(1 for _, is_reg in results if is_reg)
        average_regression_rate = total_regressions / total_landings

-        print(f"Average risk is {average_regression_rate}")
+        logger.info("Average risk is %d", average_regression_rate)

        MIN_SAMPLE = 200

--- a/bugbug/models/spambug.py
+++ b/bugbug/models/spambug.py
@ -3,6 +3,8 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
 # You can obtain one at http://mozilla.org/MPL/2.0/.

+from logging import INFO, basicConfig, getLogger
+
 import xgboost
 from imblearn.over_sampling import BorderlineSMOTE
 from sklearn.compose import ColumnTransformer
@ -12,6 +14,9 @@ from sklearn.pipeline import Pipeline
 from bugbug import bug_features, bugzilla, feature_cleanup, utils
 from bugbug.model import BugModel

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 class SpamBugModel(BugModel):
    def __init__(self, lemmatization=False):
@ -105,15 +110,13 @@ class SpamBugModel(BugModel):
            elif bug_data["product"] == "Invalid Bugs":
                classes[bug_id] = 1

-        print(
-            "{} bugs are classified as non-spam".format(
-                sum(1 for label in classes.values() if label == 0)
-            )
+        logger.info(
+            "%d bugs are classified as non-spam",
+            sum(1 for label in classes.values() if label == 0),
        )
-        print(
-            "{} bugs are classified as spam".format(
-                sum(1 for label in classes.values() if label == 1)
-            )
+        logger.info(
+            "%d bugs are classified as spam",
+            sum(1 for label in classes.values() if label == 1),
        )

        return classes, [0, 1]
--- a/bugbug/models/stepstoreproduce.py
+++ b/bugbug/models/stepstoreproduce.py
@ -3,6 +3,8 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
 # You can obtain one at http://mozilla.org/MPL/2.0/.

+from logging import INFO, basicConfig, getLogger
+
 import xgboost
 from imblearn.under_sampling import RandomUnderSampler
 from sklearn.compose import ColumnTransformer
@ -12,6 +14,9 @@ from sklearn.pipeline import Pipeline
 from bugbug import bug_features, bugzilla, feature_cleanup, utils
 from bugbug.model import BugModel

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 class StepsToReproduceModel(BugModel):
    def __init__(self, lemmatization=False):
@ -78,15 +83,13 @@ class StepsToReproduceModel(BugModel):
                        if change["removed"].startswith("stepswanted"):
                            classes[int(bug_data["id"])] = 1

-        print(
-            "{} bugs have no steps to reproduce".format(
-                sum(1 for label in classes.values() if label == 0)
-            )
+        logger.info(
+            "%d bugs have no steps to reproduce",
+            sum(1 for label in classes.values() if label == 0),
        )
-        print(
-            "{} bugs have steps to reproduce".format(
-                sum(1 for label in classes.values() if label == 1)
-            )
+        logger.info(
+            "%d bugs have steps to reproduce",
+            sum(1 for label in classes.values() if label == 1),
        )

        return classes, [0, 1]
--- a/bugbug/models/testfailure.py
+++ b/bugbug/models/testfailure.py
@ -3,6 +3,8 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
 # You can obtain one at http://mozilla.org/MPL/2.0/.

+from logging import INFO, basicConfig, getLogger
+
 import xgboost
 from imblearn.under_sampling import RandomUnderSampler
 from sklearn.compose import ColumnTransformer
@ -12,6 +14,9 @@ from sklearn.pipeline import Pipeline
 from bugbug import commit_features, repository, test_scheduling, utils
 from bugbug.model import CommitModel

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 class TestFailureModel(CommitModel):
    def __init__(self, lemmatization=False):
@ -96,15 +101,12 @@ class TestFailureModel(CommitModel):
            else:
                classes[rev] = 0

-        print(
-            "{} commits failed".format(
-                sum(1 for label in classes.values() if label == 1)
-            )
+        logger.info(
+            "%d commits failed", sum(1 for label in classes.values() if label == 1)
        )
-        print(
-            "{} commits did not fail".format(
-                sum(1 for label in classes.values() if label == 0)
-            )
+        logger.info(
+            "%d commits did not fail",
+            sum(1 for label in classes.values() if label == 0),
        )

        return classes, [0, 1]
--- a/bugbug/models/testselect.py
+++ b/bugbug/models/testselect.py
@ -31,6 +31,7 @@ from bugbug import (
 )
 from bugbug.model import Model

+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)


@ -497,8 +498,10 @@ class TestSelectModel(Model):
            len(push["failures"]) + len(push["passes"])
            for push in pushes[:train_push_len]
        )
-        print(
-            f"{train_push_len} pushes in the training set (corresponding to {train_len} push/jobs)"
+        logger.info(
+            "%d pushes in the training set (corresponding to %d push/jobs)",
+            train_push_len,
+            train_len,
        )
        return X[:train_len], X[train_len:], y[:train_len], y[train_len:]

@ -534,21 +537,17 @@ class TestSelectModel(Model):
            for name in push["passes"]:
                classes[(push["revs"][0], name)] = 0

-        print("{} pushes considered".format(len(pushes)))
-        print(
-            "{} pushes with at least one failure".format(
-                sum(1 for push in pushes if len(push["failures"]) > 0)
-            )
+        logger.info("%d pushes considered", len(pushes))
+        logger.info(
+            "%d pushes with at least one failure",
+            sum(1 for push in pushes if len(push["failures"]) > 0),
        )
-        print(
-            "{} push/jobs failed".format(
-                sum(1 for label in classes.values() if label == 1)
-            )
+        logger.info(
+            "%d push/jobs failed", sum(1 for label in classes.values() if label == 1)
        )
-        print(
-            "{} push/jobs did not fail".format(
-                sum(1 for label in classes.values() if label == 0)
-            )
+        logger.info(
+            "%d push/jobs did not fail",
+            sum(1 for label in classes.values() if label == 0),
        )

        return classes, [0, 1]
@ -595,7 +594,7 @@ class TestSelectModel(Model):
        # To evaluate the model with reductions enabled, we need to regenerate the failing together DB, using
        # only failure data from the training pushes (otherwise, we'd leak training information into the test
        # set).
-        print("Generate failing together DB (restricted to training pushes)")
+        logger.info("Generate failing together DB (restricted to training pushes)")
        push_data_iter, push_data_count, _ = test_scheduling.get_push_data(
            "label" if self.granularity == "label" else "config_group"
        )
@ -644,12 +643,16 @@ class TestSelectModel(Model):
                for push in test_pushes.values()
                if "config_group_failures" not in push
            )
-            print(
-                f"{missing_config_group_failures} pushes without config_group failures"
+            logger.info(
+                "%d pushes without config_group failures", missing_config_group_failures
            )

-        print(
-            f"Testing on {len(test_pushes)} ({test_pushes_failures} with failures) out of {len(pushes)}. {len(all_tasks)} schedulable tasks."
+        logger.info(
+            "Testing on %d (%d with failures) out of %d. %d schedulable tasks.",
+            len(test_pushes),
+            test_pushes_failures,
+            len(pushes),
+            len(all_tasks),
        )

        del pushes
@ -705,10 +708,8 @@ class TestSelectModel(Model):
            for future in concurrent.futures.as_completed(futures):
                exc = future.exception()
                if exc is not None:
-                    print(
-                        "Exception {} while running {}".format(
-                            exc, futures[future]["revs"][0]
-                        )
+                    logger.error(
+                        "Exception %s while running %s", exc, futures[future]["revs"][0]
                    )
                    for f in futures:
                        f.cancel()
@ -833,9 +834,11 @@ class TestSelectModel(Model):
                    and result["caught_percentage_config_group"] is not None
                )

-                message += f" In {percentage_caught_one_config_group}% of pushes we caught at least one config/group failure. On average, we caught {average_caught_percentage_config_group}% of all seen config/group failures."
-
-            print(message)
+            logger.info(
+                "In %d%% of pushes we caught at least one config/group failure. On average, we caught %f%% of all seen config/group failures.",
+                percentage_caught_one_config_group,
+                average_caught_percentage_config_group,
+            )

        with concurrent.futures.ProcessPoolExecutor(
            max_workers=utils.get_physical_cpu_count(),
--- a/bugbug/nlp.py
+++ b/bugbug/nlp.py
@ -6,11 +6,15 @@
 import sys
 from collections import defaultdict
 from functools import lru_cache
+from logging import INFO, basicConfig, getLogger

 import numpy as np
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.feature_extraction.text import TfidfVectorizer

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+
 HAS_OPTIONAL_DEPENDENCIES = False

 try:
@ -26,11 +30,10 @@ try:
    if HAS_OPTIONAL_DEPENDENCIES:
        nlp = spacy.load("en_core_web_sm")
 except OSError:
-    msg = (
-        "Spacy model is missing, install it with: "
-        f"{sys.executable} -m spacy download en_core_web_sm"
+    logger.error(
+        "Spacy model is missing, install it with: %s -m spacy download en_core_web_sm",
+        sys.executable,
    )
-    print(msg, file=sys.stderr)

 OPT_MSG_MISSING = (
    "Optional dependencies are missing, install them with: pip install bugbug[nlp]\n"
--- a/bugbug/phabricator.py
+++ b/bugbug/phabricator.py
@ -14,6 +14,7 @@ from tqdm import tqdm
 from bugbug import db
 from bugbug.db import LastModifiedNotAvailable

+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)

 RevisionDict = NewType("RevisionDict", dict)
@ -126,14 +127,14 @@ def download_revisions(rev_ids: Collection[int]) -> None:
        if rev["id"] in new_rev_ids:
            new_rev_ids.remove(rev["id"])

-    print(f"Loaded {old_rev_count} revisions.")
+    logger.info("Loaded %d revisions.", old_rev_count)

    new_rev_ids_list = sorted(list(new_rev_ids))
    rev_ids_groups = (
        new_rev_ids_list[i : i + 100] for i in range(0, len(new_rev_ids_list), 100)
    )

-    print(f"{len(new_rev_ids_list)} revisions left to download")
+    logger.info("%d revisions left to download", len(new_rev_ids_list))

    with tqdm(total=len(new_rev_ids)) as progress_bar:
        for rev_ids_group in rev_ids_groups:
--- a/bugbug/similarity.py
+++ b/bugbug/similarity.py
@ -235,15 +235,15 @@ class BaseSimilarity(abc.ABC):

                apk.append(score / min(len(duplicates[bug["id"]]), 10))

-        print(f"Recall @ 1: {recall_rate_1/total_r * 100}%")
-        print(f"Recall @ 5: {recall_rate_5/total_r * 100}%")
-        print(f"Recall @ 10: {recall_rate_10/total_r * 100}%")
-        print(f"Precision @ 1: {precision_rate_1/queries * 100}%")
-        print(f"Precision @ 5: {precision_rate_5/queries * 100}%")
-        print(f"Precision @ 10: {precision_rate_10/queries * 100}%")
-        print(f"Recall: {hits_r/total_r * 100}%")
-        print(f"Precision: {hits_p/total_p * 100}%")
-        print(f"MAP@k : {np.mean(apk) * 100}%")
+        logger.info("Recall @ 1: %d%", recall_rate_1 / total_r * 100)
+        logger.info("Recall @ 5: %d%", recall_rate_5 / total_r * 100)
+        logger.info("Recall @ 10: %d%", recall_rate_10 / total_r * 100)
+        logger.info("Precision @ 1: %d%", precision_rate_1 / queries * 100)
+        logger.info("Precision @ 5: %d%", precision_rate_5 / queries * 100)
+        logger.info("Precision @ 10: %d%", precision_rate_10 / queries * 100)
+        logger.info("Recall: %d%", hits_r / total_r * 100)
+        logger.info("Precision: %d%", hits_p / total_p * 100)
+        logger.info("MAP@k : %d%", np.mean(apk) * 100)

    @abc.abstractmethod
    def get_distance(self, query1, query2):
@ -407,7 +407,7 @@ class Word2VecWmdSimilarity(Word2VecSimilarityBase):
    def wmdistance(self, document1, document2, all_distances, distance_metric="cosine"):
        model = self.w2vmodel
        if len(document1) == 0 or len(document2) == 0:
-            print(
+            logger.warning(
                "At least one of the documents had no words that were in the vocabulary. Aborting (returning inf)."
            )
            return float("inf")
@ -434,7 +434,9 @@ class Word2VecWmdSimilarity(Word2VecSimilarityBase):
                    distance_matrix[i, j] = all_distances[model.wv.vocab[t2].index, i]

        if np.sum(distance_matrix) == 0.0:
-            print("The distance matrix is all zeros. Aborting (returning inf).")
+            logger.warning(
+                "The distance matrix is all zeros. Aborting (returning inf)."
+            )
            return float("inf")

        def nbow(document):
--- a/bugbug/test_scheduling.py
+++ b/bugbug/test_scheduling.py
@ -33,6 +33,7 @@ from tqdm import tqdm
 from bugbug import db, repository
 from bugbug.utils import ExpQueue, LMDBDict

+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)

 Revision = NewType("Revision", str)
@ -281,8 +282,9 @@ def get_push_data(
            )
        )

-        print(
-            f"{manifest_combinations} possible combinations of manifests on configurations"
+        logger.info(
+            "%d possible combinations of manifests on configurations",
+            manifest_combinations,
        )

    return push_data_iter, push_data_count, all_runnables
--- a/http_service/tests/test_integration.py
+++ b/http_service/tests/test_integration.py
@ -5,9 +5,13 @@

 import os
 import time
+from logging import INFO, basicConfig, getLogger

 import requests

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+
 BUGBUG_HTTP_SERVER = os.environ.get("BUGBUG_HTTP_SERVER", "http://localhost:8000/")


@ -30,7 +34,7 @@ def integration_test_single():
    if not response.ok:
        raise Exception(f"Couldn't get an answer in {timeout} seconds: {response_json}")

-    print("Response for bug 1376406", response_json)
+    logger.info("Response for bug 1376406 %s", response_json)
    assert response_json["class"] is not None


@ -55,10 +59,10 @@ def integration_test_batch():
        raise Exception(f"Couldn't get an answer in {timeout} seconds: {response_json}")

    response_1376544 = response_json["bugs"]["1376544"]
-    print("Response for bug 1376544", response_1376544)
+    logger.info("Response for bug 1376544 %s", response_1376544)
    assert response_1376544["class"] is not None
    response_1376412 = response_json["bugs"]["1376412"]
-    print("Response for bug 1376412", response_1376412)
+    logger.info("Response for bug 1376412 %s", response_1376412)
    assert response_1376412["class"] is not None


--- a/infra/spawn_pipeline.py
+++ b/infra/spawn_pipeline.py
@ -21,13 +21,16 @@ This script triggers the data pipeline for the bugbug project

 import argparse
 import os
-import sys
+from logging import INFO, basicConfig, getLogger

 import jsone
 import requests.packages.urllib3
 import taskcluster
 import yaml

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+
 requests.packages.urllib3.disable_warnings()

 TASKCLUSTER_DEFAULT_URL = "https://community-tc.services.mozilla.com"
@ -128,9 +131,11 @@ def main():
        for task_id, task_payload in tasks:
            queue.createTask(task_id, task_payload)

-        print(f"https://community-tc.services.mozilla.com/tasks/groups/{task_group_id}")
-    except taskcluster.exceptions.TaskclusterAuthFailure as e:
-        print(f"TaskclusterAuthFailure: {e.body}", file=sys.stderr)
+        logger.info(
+            "https://community-tc.services.mozilla.com/tasks/groups/%s", task_group_id
+        )
+    except taskcluster.exceptions.TaskclusterAuthFailure:
+        logger.exception("Failed to authenticate with Taskcluster")
        raise


--- a/scripts/backout_related_test_regressions.py
+++ b/scripts/backout_related_test_regressions.py
@ -4,12 +4,16 @@
 # You can obtain one at http://mozilla.org/MPL/2.0/.
 import argparse
 import json
+from logging import INFO, basicConfig, getLogger

 from mozci.push import Push
 from tqdm import tqdm

 from bugbug import db, repository

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 def go() -> None:
    assert db.download(repository.COMMITS_DB)
@ -49,10 +53,10 @@ def go() -> None:
                "group": group_regressions,
            }

-    print(f"Likely labels for backouts: {likely_label_count}")
-    print(f"Likely groups for backouts: {likely_group_count}")
-    print(f"Possible labels for backouts: {possible_label_count}")
-    print(f"Possible groups for backouts: {possible_group_count}")
+    logger.info("Likely labels for backouts: %d", likely_label_count)
+    logger.info("Likely groups for backouts: %d", likely_group_count)
+    logger.info("Possible labels for backouts: %d", possible_label_count)
+    logger.info("Possible groups for backouts: %d", possible_group_count)

    backedout_regressions = {}

--- a/scripts/generate_sheet.py
+++ b/scripts/generate_sheet.py
@ -4,12 +4,16 @@ import argparse
 import csv
 import os
 from datetime import datetime, timedelta
+from logging import INFO, basicConfig, getLogger

 import numpy as np

 from bugbug import bugzilla
 from bugbug.models import get_model_class

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 def generate_sheet(model_name: str, token: str, days: int, threshold: float) -> None:
    model_file_name = f"{model_name}model"
@ -25,7 +29,7 @@ def generate_sheet(model_name: str, token: str, days: int, threshold: float) ->
    bug_ids = bugzilla.get_ids_between(datetime.utcnow() - timedelta(days))
    bugs = bugzilla.get(bug_ids)

-    print(f"Classifying {len(bugs)} bugs...")
+    logger.info("Classifying %d bugs...", len(bugs))

    rows = [["Bug", f"{model_name}(model)", model_name, "Title"]]

--- a/scripts/get_untriaged.py
+++ b/scripts/get_untriaged.py
@ -9,6 +9,10 @@ import json
 import os
 import sys
 from datetime import date, datetime, timedelta
+from logging import INFO, basicConfig, getLogger
+
+basicConfig(level=INFO)
+logger = getLogger(__name__)

 # Inject project path
 sys.path.append("../")
@ -61,16 +65,16 @@ def run_untriaged(untriaged_bugs):
                bug["product"], bug["component"]
            )
            if not expected_component:
-                print("Skipping bug: {}".format(bug["id"]))
+                logger.info("Skipping bug: %s", bug["id"])
                continue

            if classifiable:
-                print("Classifying bug with ID: {}".format(bug["id"]))
+                logger.info("Classifying bug with ID: %s", bug["id"])
                classification = model.classify(bug)[0]
-                print("Classified bug as: {}".format(classification))
+                logger.info("Classified bug as: %s", classification)

            else:
-                print("Not classifiable bug: {}".format(bug["id"]))
+                logger.info("Not classifiable bug: %s", bug["id"])

            correct_prediction = expected_component == classification
            rows.append(
--- a/scripts/github_issue_classifier.py
+++ b/scripts/github_issue_classifier.py
@ -48,7 +48,7 @@ def classify_issues(
        issues = github.get_issues()

    for issue in issues:
-        print(f'{issue["url"]} - {issue["title"]} ')
+        logger.info("%s - %s ", issue["url"], issue["title"])

        if model.calculate_importance:
            probas, importance = model.classify(
@ -67,7 +67,7 @@ def classify_issues(
            pred_class = model.le.inverse_transform([pred_index])[0]
        else:
            pred_class = "Positive" if pred_index == 1 else "Negative"
-        print(f"{pred_class} {probability}")
+        logger.info("%s %s", pred_class, probability)
        input()


--- a/scripts/maintenance_effectiveness_indicator.py
+++ b/scripts/maintenance_effectiveness_indicator.py
@ -4,13 +4,14 @@
 # You can obtain one at http://mozilla.org/MPL/2.0/.

 import argparse
-from logging import getLogger
+from logging import INFO, basicConfig, getLogger

 import dateutil.parser

 from bugbug import bugzilla
 from bugbug.utils import get_secret

+basicConfig(level=INFO)
 logger = getLogger(__name__)


@ -44,7 +45,7 @@ def main():
            "If you want to include security bugs too, please set the BUGBUG_BUGZILLA_TOKEN environment variable to your Bugzilla API key."
        )

-    print(
+    logger.info(
        round(
            bugzilla.calculate_maintenance_effectiveness_indicator(
                args.team,
--- a/scripts/redundant_failures.py
+++ b/scripts/redundant_failures.py
@ -3,19 +3,24 @@
 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
 # You can obtain one at http://mozilla.org/MPL/2.0/.

+from logging import INFO, basicConfig, getLogger
+
 from bugbug import db, test_scheduling

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 def count(is_first_task, is_second_task):
    assert db.download(test_scheduling.PUSH_DATA_LABEL_DB)

    push_data = list(db.read(test_scheduling.PUSH_DATA_LABEL_DB))

-    print(f"Analyzing {len(push_data)} pushes...")
+    logger.info("Analyzing %d pushes...", len(push_data))

    all_tasks = set(task for _, _, push_tasks, _, _ in push_data for task in push_tasks)

-    print(f"Considering {len(all_tasks)} tasks...")
+    logger.info("Considering %d tasks...", len(all_tasks))

    count_runs = 0
    count_any_of_the_two = 0
@ -96,8 +101,12 @@ def main():
        count_first_but_not_second,
        count_second_but_not_first,
    ) = count(is_first_task, is_second_task)
-    print(
-        f"Out of {count_runs} runs, any of the two failed {count_any_of_the_two} times. The first exclusively failed {count_first_but_not_second} times, the second exclusively failed {count_second_but_not_first} times."
+    logger.info(
+        "Out of %d runs, any of the two failed %d times. The first exclusively failed %d times, the second exclusively failed %d times.",
+        count_runs,
+        count_any_of_the_two,
+        count_first_but_not_second,
+        count_second_but_not_first,
    )


--- a/scripts/shadow_scheduler_stats.py
+++ b/scripts/shadow_scheduler_stats.py
@ -19,6 +19,7 @@ from tqdm import tqdm

 from bugbug import db, test_scheduling, utils

+logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)


@ -298,19 +299,32 @@ def print_uncaught(

        if len(caught_by_scheduler[scheduler1]) == 0:
            if scheduler2 is not None and scheduler2 not in caught_by_scheduler:
-                print(
-                    f"{scheduler1} didn't catch any of the {len(regressions)} regressions on {rev}"
+                logger.info(
+                    "%s didn't catch any of the %d regressions on %s",
+                    scheduler1,
+                    len(regressions),
+                    rev,
                )
            elif scheduler2 is not None and len(caught_by_scheduler[scheduler2]) == 0:
-                print(
-                    f"{scheduler1} and {scheduler2} didn't catch any of the {len(regressions)} regressions on {rev}"
+                logger.info(
+                    "%s and %s didn't catch any of the %d regressions on %s",
+                    scheduler1,
+                    scheduler2,
+                    len(regressions),
+                    rev,
                )
            else:
-                print(
-                    f"{scheduler1} didn't catch any of the {len(regressions)} regressions on {rev}, while {scheduler2} did"
+                logger.info(
+                    "%s didn't catch any of the %d regressions on %s, while %s did",
+                    scheduler1,
+                    len(regressions),
+                    rev,
+                    scheduler2,
                )
-            print(f"Regressions: {regressions}")
-            print(f"Scheduled by {scheduler1}: {scheduled_by_scheduler[scheduler1]}")
+            logger.info("Regressions: %s", regressions)
+            logger.info(
+                "Scheduled by %s: %s", scheduler1, scheduled_by_scheduler[scheduler1]
+            )


 def main() -> None:
--- a/tests/test_models.py
+++ b/tests/test_models.py
@ -4,10 +4,14 @@
 # You can obtain one at http://mozilla.org/MPL/2.0/.

 import importlib
+from logging import INFO, basicConfig, getLogger

 from bugbug import model
 from bugbug.models import MODELS, get_model_class

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

 def test_import_all_models():
    """Try loading all defined models to ensure that their full qualified
@ -18,7 +22,7 @@ def test_import_all_models():
        if model_name == "component_nn" and not importlib.util.find_spec("tensorflow"):
            continue

-        print("Try loading model", model_name)
+        logger.info("Try loading model %s", model_name)
        get_model_class(model_name)


--- a/tests/test_repository.py
+++ b/tests/test_repository.py
@ -9,6 +9,7 @@ import pickle
 import shutil
 import time
 from datetime import datetime, timezone
+from logging import INFO, basicConfig, getLogger

 import hglib
 import pytest
@ -19,6 +20,9 @@ from dateutil.relativedelta import relativedelta

 from bugbug import commit_features, repository, rust_code_analysis_server

+basicConfig(level=INFO)
+logger = getLogger(__name__)
+

@pytest.fixture
 def fake_hg_repo(tmpdir):
@ -2399,7 +2403,7 @@ void main() {

    patch_data = rs_parsepatch.get_lines(patch)

-    print(patch_data)
+    logger.info(patch_data)

    assert len(patch_data) == 1

@ -2650,7 +2654,7 @@ void main() {

    patch_data = rs_parsepatch.get_lines(patch)

-    print(patch_data)
+    logger.info(patch_data)

    assert len(patch_data) == 1