diff --git a/infra/data-pipeline.yml b/infra/data-pipeline.yml index 7f45b053..9a0f2781 100644 --- a/infra/data-pipeline.yml +++ b/infra/data-pipeline.yml @@ -26,6 +26,10 @@ tasks: - /cache/ - https://github.com/marco-c/gecko-dev-wordified - --tokenize + artifacts: + public/gecko-dev-wordified.version: + path: /data/gecko-dev-wordified.version + type: file cache: bugbug-mercurial-repository: /cache features: @@ -61,6 +65,10 @@ tasks: - /cache/ - https://github.com/marco-c/gecko-dev-comments-removed - --remove-comments + artifacts: + public/gecko-dev-comments-removed.version: + path: /data/gecko-dev-comments-removed.version + type: file cache: bugbug-mercurial-repository: /cache features: @@ -97,6 +105,10 @@ tasks: - https://github.com/marco-c/gecko-dev-wordified-and-comments-removed - --tokenize - --remove-comments + artifacts: + public/gecko-dev-wordified-and-comments-removed.version: + path: /data/gecko-dev-wordified-and-comments-removed.version + type: file cache: bugbug-mercurial-repository: /cache features: diff --git a/scripts/microannotate_generator.py b/scripts/microannotate_generator.py index 0e467a6f..4079e8ff 100644 --- a/scripts/microannotate_generator.py +++ b/scripts/microannotate_generator.py @@ -8,13 +8,17 @@ from logging import INFO, basicConfig, getLogger from microannotate import generator -from bugbug import repository +from bugbug import db, repository from bugbug.utils import get_secret, retry basicConfig(level=INFO) logger = getLogger(__name__) +# When updating the version, the git repositories will be recreated from scratch. +# This is useful when new meaningful versions of rust-code-analysis or microannotate +# are used. +VERSION = 1 COMMITS_STEP = 5000 @@ -30,6 +34,12 @@ class MicroannotateGenerator(object): self.repo_dir = os.path.join(cache_root, "mozilla-central") def generate(self): + db.register( + self.git_repo_path, + f"https://community-tc.services.mozilla.com/api/index/v1/task/project.relman.bugbug.microannotate_{self.git_repo_path}.latest/artifacts/public/", + VERSION, + ) + with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: cloner = executor.submit(repository.clone, self.repo_dir) cloner.add_done_callback(