From 936f2ccb3c6440875009c7f8f31825c7a772fe1d Mon Sep 17 00:00:00 2001 From: Marco Castelluccio Date: Wed, 5 Feb 2020 00:09:26 +0100 Subject: [PATCH] Make it possible to run the bugbug-commit-classify script with either a Phabricator diff or a revision --- infra/taskcluster-hook-classify-patch.json | 4 +- infra/taskcluster-hook-test-select.json | 4 +- scripts/commit_classifier.py | 247 +++++++++++---------- 3 files changed, 138 insertions(+), 117 deletions(-) diff --git a/infra/taskcluster-hook-classify-patch.json b/infra/taskcluster-hook-classify-patch.json index 7c9f8157..8992da18 100644 --- a/infra/taskcluster-hook-classify-patch.json +++ b/infra/taskcluster-hook-classify-patch.json @@ -50,8 +50,8 @@ "bugbug-classify-commit", "regressor", "/cache/mozilla-central", - "${payload['DIFF_ID']}", - "${payload['PHABRICATOR_DEPLOYMENT']}", + "--phabricator-deployment=${payload['PHABRICATOR_DEPLOYMENT']}", + "--diff-id=${payload['DIFF_ID']}", "--git_repo_dir=/gecko-dev", "--method_defect_predictor_dir=/MethodDefectPredictor" ], diff --git a/infra/taskcluster-hook-test-select.json b/infra/taskcluster-hook-test-select.json index 685ca85a..a27bfd1f 100644 --- a/infra/taskcluster-hook-test-select.json +++ b/infra/taskcluster-hook-test-select.json @@ -49,8 +49,8 @@ "bugbug-classify-commit", "testselect", "/cache/mozilla-central", - "${payload['DIFF_ID']}", - "${payload['PHABRICATOR_DEPLOYMENT']}", + "--phabricator-deployment=${payload['PHABRICATOR_DEPLOYMENT']}", + "--diff-id=${payload['DIFF_ID']}", "--runnable-jobs=${payload['RUNNABLE_JOBS']}" ], "image": "mozilla/bugbug-commit-retrieval", diff --git a/scripts/commit_classifier.py b/scripts/commit_classifier.py index ea8ec581..d081baf5 100644 --- a/scripts/commit_classifier.py +++ b/scripts/commit_classifier.py @@ -222,16 +222,16 @@ class CommitClassifier(object): repository.download_commits(self.repo_dir, rev_start) - def apply_phab(self, hg, phabricator_deployment, diff_id): - def has_revision(revision): - if not revision: - return False - try: - hg.identify(revision) - return True - except hglib.error.CommandError: - return False + def has_revision(self, hg, revision): + if not revision: + return False + try: + hg.identify(revision) + return True + except hglib.error.CommandError: + return False + def apply_phab(self, hg, phabricator_deployment, diff_id): if phabricator_deployment == PHAB_PROD: api_key = get_secret("PHABRICATOR_TOKEN") url = get_secret("PHABRICATOR_URL") @@ -252,7 +252,7 @@ class CommitClassifier(object): needed_stack.insert(0, patch) # Stop as soon as a base revision is available - if has_revision(patch.base_revision): + if self.has_revision(hg, patch.base_revision): logger.info( f"Stopping at diff {patch.id} and revision {patch.base_revision}" ) @@ -273,7 +273,7 @@ class CommitClassifier(object): # Update repo to base revision hg_base = needed_stack[0].base_revision - if not has_revision(hg_base): + if not self.has_revision(hg, hg_base): logger.warning("Missing base revision {} from Phabricator".format(hg_base)) hg_base = "tip" @@ -545,129 +545,147 @@ class CommitClassifier(object): with open("importances.json", "w") as f: json.dump(features, f) - def classify(self, phabricator_deployment, diff_id, runnable_jobs_path): + def classify( + self, + revision=None, + phabricator_deployment=None, + diff_id=None, + runnable_jobs_path=None, + ): + if revision is not None: + assert phabricator_deployment is None + assert diff_id is None + + if diff_id is not None: + assert phabricator_deployment is not None + assert revision is None + self.update_commit_db() with hglib.open(self.repo_dir) as hg: - self.apply_phab(hg, phabricator_deployment, diff_id) + if phabricator_deployment is not None and diff_id is not None: + self.apply_phab(hg, phabricator_deployment, diff_id) - patch_rev = hg.log(revrange="not public()")[0].node + revision = hg.log(revrange="not public()")[0].node.decode("utf-8") # Analyze patch. commits = repository.download_commits( - self.repo_dir, rev_start=patch_rev.decode("utf-8"), save=False + self.repo_dir, rev_start=revision, save=False ) + if not self.use_test_history: + self.classify_regressor(commits) + else: + self.classify_test_select(commits, runnable_jobs_path) + + def classify_regressor(self, commits): # We use "clean" (or "dirty") commits as the background dataset for feature importance. # This way, we can see the features which are most important in differentiating # the current commit from the "clean" (or "dirty") commits. + probs, importance = self.model.classify( + commits[-1], + probabilities=True, + importances=True, + background_dataset=lambda v: self.X[self.y != v], + importance_cutoff=0.05, + ) - if not self.use_test_history: - probs, importance = self.model.classify( - commits[-1], - probabilities=True, - importances=True, - background_dataset=lambda v: self.X[self.y != v], - importance_cutoff=0.05, - ) + self.generate_feature_importance_data(probs, importance) - self.generate_feature_importance_data(probs, importance) + with open("probs.json", "w") as f: + json.dump(probs[0].tolist(), f) - with open("probs.json", "w") as f: - json.dump(probs[0].tolist(), f) + if self.model_name == "regressor" and self.method_defect_predictor_dir: + self.classify_methods(commits[-1]) - if self.model_name == "regressor" and self.method_defect_predictor_dir: - self.classify_methods(commits[-1]) + def classify_test_select(self, commits, runnable_jobs_path): + testfailure_probs = self.testfailure_model.classify( + commits[-1], probabilities=True + ) + + logger.info(f"Test failure risk: {testfailure_probs[0][1]}") + + commit_data = commit_features.merge_commits(commits) + + push_num = self.past_failures_data["push_num"] + + # XXX: Consider using mozilla-central built-in rules to filter some of these out, e.g. SCHEDULES. + all_tasks = self.past_failures_data["all_tasks"] + + if not runnable_jobs_path: + runnable_jobs = {task for task in all_tasks} + elif runnable_jobs_path.startswith("http"): + r = requests.get(runnable_jobs_path) + r.raise_for_status() + runnable_jobs = r.json() else: - testfailure_probs = self.testfailure_model.classify( - commits[-1], probabilities=True + with open(runnable_jobs_path, "r") as f: + runnable_jobs = json.load(f) + + # XXX: For now, only restrict to linux64 test tasks. + all_tasks = [ + t + for t in all_tasks + if t.startswith("test-linux1804-64/") and "test-verify" not in t + ] + + # XXX: Remove tasks which are not in runnable jobs right away, so we avoid classifying them. + + commit_tests = [] + for data in test_scheduling.generate_data( + self.past_failures_data, commit_data, push_num, all_tasks, [], [] + ): + if not data["name"].startswith("test-"): + continue + + commit_test = commit_data.copy() + commit_test["test_job"] = data + commit_tests.append(commit_test) + + probs = self.model.classify(commit_tests, probabilities=True) + selected_indexes = np.argwhere( + probs[:, 1] > float(get_secret("TEST_SELECTION_CONFIDENCE_THRESHOLD")) + )[:, 0] + selected_tasks = [commit_tests[i]["test_job"]["name"] for i in selected_indexes] + + with open("failure_risk", "w") as f: + f.write( + "1" + if testfailure_probs[0][1] + > float(get_secret("TEST_FAILURE_CONFIDENCE_THRESHOLD")) + else "0" ) - logger.info(f"Test failure risk: {testfailure_probs[0][1]}") - - commit_data = commit_features.merge_commits(commits) - - push_num = self.past_failures_data["push_num"] - - # XXX: Consider using mozilla-central built-in rules to filter some of these out, e.g. SCHEDULES. - all_tasks = self.past_failures_data["all_tasks"] - - if not runnable_jobs_path: - runnable_jobs = {task for task in all_tasks} - elif runnable_jobs_path.startswith("http"): - r = requests.get(runnable_jobs_path) - r.raise_for_status() - runnable_jobs = r.json() - else: - with open(runnable_jobs_path, "r") as f: - runnable_jobs = json.load(f) - - # XXX: For now, only restrict to linux64 test tasks. - all_tasks = [ - t - for t in all_tasks - if t.startswith("test-linux1804-64/") and "test-verify" not in t - ] - - # XXX: Remove tasks which are not in runnable jobs right away, so we avoid classifying them. - - commit_tests = [] - for data in test_scheduling.generate_data( - self.past_failures_data, commit_data, push_num, all_tasks, [], [] + # This should be kept in sync with the test scheduling history retriever script. + cleaned_selected_tasks = [] + for selected_task in selected_tasks: + if ( + selected_task.startswith("test-linux64") + and selected_task not in runnable_jobs ): - if not data["name"].startswith("test-"): - continue - - commit_test = commit_data.copy() - commit_test["test_job"] = data - commit_tests.append(commit_test) - - probs = self.model.classify(commit_tests, probabilities=True) - selected_indexes = np.argwhere( - probs[:, 1] > float(get_secret("TEST_SELECTION_CONFIDENCE_THRESHOLD")) - )[:, 0] - selected_tasks = [ - commit_tests[i]["test_job"]["name"] for i in selected_indexes - ] - - with open("failure_risk", "w") as f: - f.write( - "1" - if testfailure_probs[0][1] - > float(get_secret("TEST_FAILURE_CONFIDENCE_THRESHOLD")) - else "0" + selected_task = selected_task.replace( + "test-linux64-", "test-linux1804-64-" ) - # This should be kept in sync with the test scheduling history retriever script. + if ( + selected_task.startswith("test-linux1804-64-") + and selected_task not in runnable_jobs + ): + selected_task = selected_task.replace( + "test-linux1804-64-", "test-linux64-" + ) + + if selected_task in runnable_jobs: + cleaned_selected_tasks.append(selected_task) + + # It isn't worth running the build associated to the tests, if we only run three test tasks. + if len(cleaned_selected_tasks) < 3: cleaned_selected_tasks = [] - for selected_task in selected_tasks: - if ( - selected_task.startswith("test-linux64") - and selected_task not in runnable_jobs - ): - selected_task = selected_task.replace( - "test-linux64-", "test-linux1804-64-" - ) - if ( - selected_task.startswith("test-linux1804-64-") - and selected_task not in runnable_jobs - ): - selected_task = selected_task.replace( - "test-linux1804-64-", "test-linux64-" - ) - - if selected_task in runnable_jobs: - cleaned_selected_tasks.append(selected_task) - - # It isn't worth running the build associated to the tests, if we only run three test tasks. - if len(cleaned_selected_tasks) < 3: - cleaned_selected_tasks = [] - - with open("selected_tasks", "w") as f: - f.writelines( - f"{selected_task}\n" for selected_task in cleaned_selected_tasks - ) + with open("selected_tasks", "w") as f: + f.writelines( + f"{selected_task}\n" for selected_task in cleaned_selected_tasks + ) def classify_methods(self, commit): # Get commit hash from 4 months before the analysis time. @@ -748,13 +766,14 @@ def main(): "repo_dir", help="Path to a Gecko repository. If no repository exists, it will be cloned to this location.", ) - parser.add_argument("diff_id", help="diff ID to analyze.", type=int) parser.add_argument( - "phabricator_deployment", + "--phabricator-deployment", help="Which Phabricator deployment to hit.", type=str, choices=[PHAB_PROD, PHAB_DEV], ) + parser.add_argument("--diff-id", help="diff ID to analyze.", type=int) + parser.add_argument("--revision", help="revision to analyze.", type=str) parser.add_argument( "--runnable-jobs", help="Path or URL to a file containing runnable jobs.", @@ -773,7 +792,9 @@ def main(): classifier = CommitClassifier( args.model, args.repo_dir, args.git_repo_dir, args.method_defect_predictor_dir ) - classifier.classify(args.phabricator_deployment, args.diff_id, args.runnable_jobs) + classifier.classify( + args.revision, args.phabricator_deployment, args.diff_id, args.runnable_jobs + ) if __name__ == "__main__":