When retrieving past failure data for a TOML manifest, fallback on data from the homonymous INI manifest (#3691)

If we don't, we would consider TOML manifests as new, which would negatively affect the test selection model
2023-10-06 15:15:40 +02:00 · 2023-10-06 15:15:40 +02:00 · b37623c74d
--- a/bugbug/models/testselect.py
+++ b/bugbug/models/testselect.py
@ -175,14 +175,13 @@ def _get_equivalence_sets(min_redundancy_confidence: float):
        with open(f"equivalence_sets_{min_redundancy_confidence}.pickle", "rb") as fr:
            return pickle.load(fr)
    except FileNotFoundError:
-        past_failures_data = test_scheduling.get_past_failures("group", True)
-        all_runnables = past_failures_data["all_runnables"]
+        past_failures_data = test_scheduling.PastFailures("group", True)

        equivalence_sets = {}
        failing_together = test_scheduling.get_failing_together_db("config_group", True)
        all_configs = pickle.loads(failing_together[b"$ALL_CONFIGS$"])
        configs_by_group = pickle.loads(failing_together[b"$CONFIGS_BY_GROUP$"])
-        for group in all_runnables:
+        for group in past_failures_data.all_runnables:
            key = test_scheduling.failing_together_key(group)
            try:
                failing_together_stats = pickle.loads(failing_together[key])
@ -561,11 +560,10 @@ class TestSelectModel(Model):
    ) -> dict[str, float]:
        commit_data = commit_features.merge_commits(commits)

-        past_failures_data = test_scheduling.get_past_failures(self.granularity, True)
+        past_failures_data = test_scheduling.PastFailures(self.granularity, True)

        if push_num is None:
-            push_num = past_failures_data["push_num"] + 1
-        all_runnables = past_failures_data["all_runnables"]
+            push_num = past_failures_data.push_num + 1

        commit_tests = []
        for data in test_scheduling.generate_data(
@ -573,7 +571,7 @@ class TestSelectModel(Model):
            past_failures_data,
            commit_data,
            push_num,
-            all_runnables,
+            past_failures_data.all_runnables,
            tuple(),
            tuple(),
        ):
@ -660,8 +658,8 @@ class TestSelectModel(Model):

        commit_map = get_commit_map(all_revs)

-        past_failures_data = test_scheduling.get_past_failures(self.granularity, True)
-        last_push_num = past_failures_data["push_num"]
+        past_failures_data = test_scheduling.PastFailures(self.granularity, True)
+        last_push_num = past_failures_data.push_num
        past_failures_data.close()

        # Select tests for all the pushes in the test set.
--- a/bugbug/test_scheduling.py
+++ b/bugbug/test_scheduling.py
@ -310,21 +310,62 @@ def get_test_scheduling_history(granularity):
        yield obj["revs"], obj["data"]


-def get_past_failures(granularity, readonly):
-    if granularity == "label":
-        past_failures_db = os.path.join("data", PAST_FAILURES_LABEL_DB)
-    elif granularity == "group":
-        past_failures_db = os.path.join("data", PAST_FAILURES_GROUP_DB)
-    elif granularity == "config_group":
-        past_failures_db = os.path.join("data", PAST_FAILURES_CONFIG_GROUP_DB)
-    else:
-        raise UnexpectedGranularityError(granularity)
+class PastFailures:
+    def __init__(self, granularity, readonly):
+        if granularity == "label":
+            past_failures_db = os.path.join("data", PAST_FAILURES_LABEL_DB)
+        elif granularity == "group":
+            past_failures_db = os.path.join("data", PAST_FAILURES_GROUP_DB)
+        elif granularity == "config_group":
+            assert False, "config_group granularity not supported for past failures"
+        else:
+            raise UnexpectedGranularityError(granularity)
+        self.granularity = granularity

-    return shelve.Shelf(
-        LMDBDict(past_failures_db[: -len(".tar.zst")], readonly=readonly),
-        protocol=pickle.DEFAULT_PROTOCOL,
-        writeback=not readonly,
-    )
+        self.db = shelve.Shelf(
+            LMDBDict(past_failures_db[: -len(".tar.zst")], readonly=readonly),
+            protocol=pickle.DEFAULT_PROTOCOL,
+            writeback=not readonly,
+        )
+
+    @property
+    def push_num(self) -> int:
+        return self.db["push_num"]
+
+    @push_num.setter
+    def push_num(self, value: int) -> None:
+        self.db["push_num"] = value
+
+    @property
+    def all_runnables(self):
+        return self.db["all_runnables"]
+
+    @all_runnables.setter
+    def all_runnables(self, value) -> None:
+        self.db["all_runnables"] = value
+
+    def get(self, key: str) -> ExpQueue | None:
+        value = self.db.get(key, None)
+
+        # Fallback on INI if the group is now TOML.
+        if value is None and self.granularity == "group" and key.endswith(".toml"):
+            ini_key = f"{key[:-4]}ini"
+            try:
+                value = self.db[ini_key]
+                self.db[key] = value
+            except KeyError:
+                return None
+
+        return value
+
+    def set(self, key: str, value: ExpQueue) -> None:
+        self.db[key] = value
+
+    def sync(self) -> None:
+        self.db.sync()
+
+    def close(self) -> None:
+        self.db.close()


 def get_failing_together_db_path(granularity: str) -> str:
@ -670,15 +711,14 @@ def _read_and_update_past_failures(
    for item in items:
        full_key = key + item

-        is_new = full_key not in past_failures
+        cur = past_failures.get(full_key)
+        is_new = cur is None

        if is_new:
            if not is_regression:
                continue

            cur = ExpQueue(round(push_num / 100), int(HISTORICAL_TIMESPAN / 100) + 1, 0)
-        else:
-            cur = past_failures[full_key]

        value = cur[round(push_num / 100)]

@ -690,7 +730,7 @@ def _read_and_update_past_failures(
        if is_regression:
            cur[round(push_num / 100)] = value + 1
            if is_new:
-                past_failures[full_key] = cur
+                past_failures.set(full_key, cur)

    return (
        sum(values_total),
@ -702,7 +742,7 @@ def _read_and_update_past_failures(

 def generate_data(
    granularity: str,
-    past_failures: int,
+    past_failures: PastFailures,
    commit: repository.CommitDict,
    push_num: int,
    runnables: Iterable[str],
--- a/http_service/bugbug_http/models.py
+++ b/http_service/bugbug_http/models.py
@ -260,15 +260,14 @@ def get_config_specific_groups(config: str) -> str:

    equivalence_sets = testselect._get_equivalence_sets(0.9)

-    past_failures_data = test_scheduling.get_past_failures("group", True)
-    all_runnables = past_failures_data["all_runnables"]
+    past_failures_data = test_scheduling.PastFailures("group", True)

    setkey(
        job.result_key,
        orjson.dumps(
            [
                {"name": group}
-                for group in all_runnables
+                for group in past_failures_data.all_runnables
                if any(
                    equivalence_set == {config}
                    for equivalence_set in equivalence_sets[group]
--- a/http_service/tests/conftest.py
+++ b/http_service/tests/conftest.py
@ -320,9 +320,9 @@ def mock_get_config_specific_groups(
        f.write("prova")

    # Initialize a mock past failures DB.
-    past_failures_data = test_scheduling.get_past_failures("group", False)
-    past_failures_data["push_num"] = 1
-    past_failures_data["all_runnables"] = [
+    past_failures_data = test_scheduling.PastFailures("group", False)
+    past_failures_data.push_num = 1
+    past_failures_data.all_runnables = [
        "test-group1",
        "test-group2",
    ]
@ -375,9 +375,9 @@ def mock_schedule_tests_classify(

    # Initialize a mock past failures DB.
    for granularity in ("label", "group"):
-        past_failures_data = test_scheduling.get_past_failures(granularity, False)
-        past_failures_data["push_num"] = 1
-        past_failures_data["all_runnables"] = [
+        past_failures_data = test_scheduling.PastFailures(granularity, False)
+        past_failures_data.push_num = 1
+        past_failures_data.all_runnables = [
            "test-linux1804-64-opt-label1",
            "test-linux1804-64-opt-label2",
            "test-group1",
--- a/scripts/commit_classifier.py
+++ b/scripts/commit_classifier.py
@ -210,7 +210,7 @@ class CommitClassifier(object):
                test_scheduling.TEST_LABEL_SCHEDULING_DB,
                test_scheduling.PAST_FAILURES_LABEL_DB,
            )
-            self.past_failures_data = test_scheduling.get_past_failures("label", True)
+            self.past_failures_data = test_scheduling.PastFailures("label", True)

            self.testfailure_model = cast(
                TestFailureModel, TestFailureModel.load(download_model("testfailure"))
--- a/scripts/test_scheduling_history_retriever.py
+++ b/scripts/test_scheduling_history_retriever.py
@ -223,16 +223,19 @@ class Retriever(object):
            )

        def generate_all_data() -> Generator[dict[str, Any], None, None]:
-            past_failures = test_scheduling.get_past_failures(granularity, False)
+            past_failures = test_scheduling.PastFailures(granularity, False)

-            push_num = past_failures["push_num"] if "push_num" in past_failures else 0
+            try:
+                push_num = past_failures.push_num
+            except KeyError:
+                push_num = 0

            commit_map = {}
            for commit_data in tqdm(repository.get_commits()):
                commit_map[commit_data["node"]] = commit_data

            # Store all runnables in the past_failures DB so it can be used in the evaluation phase.
-            past_failures["all_runnables"] = all_runnables
+            past_failures.all_runnables = all_runnables
            # XXX: Should we recreate the DB from scratch if the previous all_runnables are not the
            # same as the current ones?

@ -334,7 +337,7 @@ class Retriever(object):
            logger.info("skipped %d (too big commits)", skipped_too_big_commits)
            logger.info("skipped %d (no interesting runnables)", skipped_no_runnables)

-            past_failures["push_num"] = push_num
+            past_failures.push_num = push_num
            past_failures.close()

        # For the config/group granularity, we are only interested in the failing together DB.
--- a/tests/test_test_scheduling.py
+++ b/tests/test_test_scheduling.py
@ -11,6 +11,7 @@ from _pytest.monkeypatch import MonkeyPatch
 from bugbug import repository, test_scheduling
 from bugbug.repository import CommitDict
 from bugbug.test_scheduling import ConfigGroup, Group, Revision, Task
+from bugbug.utils import ExpQueue


 def test_rename_runnables() -> None:
@ -564,7 +565,7 @@ def test_touched_together_with_backout(monkeypatch: MonkeyPatch) -> None:

@pytest.mark.parametrize("granularity", ["group", "label"])
 def test_generate_data(granularity: str) -> None:
-    past_failures = test_scheduling.get_past_failures(granularity, False)
+    past_failures = test_scheduling.PastFailures(granularity, False)

    commits = [
        CommitDict(
@ -1000,3 +1001,25 @@ def test_generate_data(granularity: str) -> None:
        obj["touched_together_directories"] = 0
        obj["touched_together_files"] = 0
    assert data[1] == obj
+
+
+def test_fallback_on_ini() -> None:
+    past_failures = test_scheduling.PastFailures("group", False)
+
+    past_failures.set("browser.ini", ExpQueue(0, 1, 42))
+    past_failures.set("reftest.list", ExpQueue(0, 1, 7))
+
+    def assert_val(manifest, val):
+        exp_queue = past_failures.get(manifest)
+        assert exp_queue is not None
+        assert exp_queue[0] == val
+
+    assert_val("browser.ini", 42)
+    assert_val("browser.toml", 42)
+    assert_val("reftest.list", 7)
+    assert past_failures.get("reftest.toml") is None
+    assert past_failures.get("unexisting.ini") is None
+
+    past_failures.set("browser.toml", ExpQueue(0, 1, 22))
+    assert_val("browser.toml", 22)
+    assert_val("browser.ini", 42)
--- a/tests/test_testselect.py
+++ b/tests/test_testselect.py
@ -617,8 +617,8 @@ def test_all(g: Graph) -> None:


 def test_select_configs(failing_together_config_group: LMDBDict) -> None:
-    past_failures_data = test_scheduling.get_past_failures("group", False)
-    past_failures_data["all_runnables"] = ["group1", "group2", "group3"]
+    past_failures_data = test_scheduling.PastFailures("group", False)
+    past_failures_data.all_runnables = ["group1", "group2", "group3"]
    past_failures_data.close()

    failing_together_config_group[b"group1"] = pickle.dumps(