зеркало из https://github.com/mozilla/bugbug.git
Run method-level risk analysis
This commit is contained in:
Родитель
53f40dd8fb
Коммит
509ecc5368
|
@ -34,6 +34,10 @@
|
||||||
"public/importance.html": {
|
"public/importance.html": {
|
||||||
"path": "/importance.html",
|
"path": "/importance.html",
|
||||||
"type": "file"
|
"type": "file"
|
||||||
|
},
|
||||||
|
"public/method_level.json": {
|
||||||
|
"path": "/method_level.json",
|
||||||
|
"type": "file"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"cache": {
|
"cache": {
|
||||||
|
|
|
@ -1,20 +1,32 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import csv
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
from datetime import datetime
|
||||||
from logging import INFO, basicConfig, getLogger
|
from logging import INFO, basicConfig, getLogger
|
||||||
|
|
||||||
import hglib
|
import hglib
|
||||||
import joblib
|
import joblib
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from dateutil.relativedelta import relativedelta
|
||||||
|
from libmozdata import vcs_map
|
||||||
from libmozdata.phabricator import PhabricatorAPI
|
from libmozdata.phabricator import PhabricatorAPI
|
||||||
from scipy.stats import spearmanr
|
from scipy.stats import spearmanr
|
||||||
|
|
||||||
from bugbug import db, repository
|
from bugbug import db, repository
|
||||||
from bugbug.models.regressor import RegressorModel
|
from bugbug.models.regressor import RegressorModel
|
||||||
from bugbug.utils import download_check_etag, get_secret, to_array, zstd_decompress
|
from bugbug.utils import (
|
||||||
|
download_check_etag,
|
||||||
|
get_secret,
|
||||||
|
retry,
|
||||||
|
to_array,
|
||||||
|
zstd_decompress,
|
||||||
|
)
|
||||||
|
|
||||||
basicConfig(level=INFO)
|
basicConfig(level=INFO)
|
||||||
logger = getLogger(__name__)
|
logger = getLogger(__name__)
|
||||||
|
@ -23,7 +35,7 @@ URL = "https://index.taskcluster.net/v1/task/project.relman.bugbug.train_regress
|
||||||
|
|
||||||
|
|
||||||
class CommitClassifier(object):
|
class CommitClassifier(object):
|
||||||
def __init__(self, cache_root):
|
def __init__(self, cache_root, git_repo_dir, method_defect_predictor_dir):
|
||||||
self.cache_root = cache_root
|
self.cache_root = cache_root
|
||||||
|
|
||||||
assert os.path.isdir(cache_root), f"Cache root {cache_root} is not a dir."
|
assert os.path.isdir(cache_root), f"Cache root {cache_root} is not a dir."
|
||||||
|
@ -63,6 +75,38 @@ class CommitClassifier(object):
|
||||||
self.X = to_array(joblib.load(regressormodel_data_X_path))
|
self.X = to_array(joblib.load(regressormodel_data_X_path))
|
||||||
self.y = to_array(joblib.load(regressormodel_data_y_path))
|
self.y = to_array(joblib.load(regressormodel_data_y_path))
|
||||||
|
|
||||||
|
self.method_defect_predictor_dir = method_defect_predictor_dir
|
||||||
|
self.clone_git_repo(
|
||||||
|
"https://github.com/lucapascarella/MethodDefectPredictor",
|
||||||
|
method_defect_predictor_dir,
|
||||||
|
"6215de02517eb3484d6943ce1a1fb6c13b27475f",
|
||||||
|
)
|
||||||
|
self.git_repo_dir = git_repo_dir
|
||||||
|
self.clone_git_repo("https://github.com/mozilla/gecko-dev", git_repo_dir)
|
||||||
|
|
||||||
|
def clone_git_repo(self, repo_url, repo_dir, rev="master"):
|
||||||
|
logger.info(f"Cloning {repo_url}...")
|
||||||
|
|
||||||
|
if not os.path.exists(repo_dir):
|
||||||
|
retry(
|
||||||
|
lambda: subprocess.run(["git", "clone", repo_url, repo_dir], check=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
retry(
|
||||||
|
lambda: subprocess.run(
|
||||||
|
["git", "pull", repo_url, "master"],
|
||||||
|
cwd=repo_dir,
|
||||||
|
capture_output=True,
|
||||||
|
check=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
retry(
|
||||||
|
lambda: subprocess.run(
|
||||||
|
["git", "checkout", rev], cwd=repo_dir, capture_output=True, check=True
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
def update_commit_db(self):
|
def update_commit_db(self):
|
||||||
repository.clone(self.repo_dir)
|
repository.clone(self.repo_dir)
|
||||||
|
|
||||||
|
@ -126,6 +170,17 @@ class CommitClassifier(object):
|
||||||
hg.update(rev=hg_base, clean=True)
|
hg.update(rev=hg_base, clean=True)
|
||||||
logger.info(f"Updated repo to {hg_base}")
|
logger.info(f"Updated repo to {hg_base}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.git_base = vcs_map.mercurial_to_git(hg_base)
|
||||||
|
subprocess.run(
|
||||||
|
["git", "checkout", "-b", "analysis_branch", self.git_base],
|
||||||
|
check=True,
|
||||||
|
cwd=self.git_repo_dir,
|
||||||
|
)
|
||||||
|
logger.info(f"Updated git repo to {self.git_base}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.info(f"Updating git repo to Mercurial {hg_base} failed: {e}")
|
||||||
|
|
||||||
for patch in needed_stack:
|
for patch in needed_stack:
|
||||||
revision = revisions[patch.phid]
|
revision = revisions[patch.phid]
|
||||||
|
|
||||||
|
@ -144,6 +199,20 @@ class CommitClassifier(object):
|
||||||
user="bugbug",
|
user="bugbug",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||||
|
temp_file = os.path.join(tmpdirname, "temp.patch")
|
||||||
|
with open(temp_file, "w") as f:
|
||||||
|
f.write(patch.patch)
|
||||||
|
|
||||||
|
subprocess.run(
|
||||||
|
["git", "apply", "--3way", temp_file],
|
||||||
|
check=True,
|
||||||
|
cwd=self.git_repo_dir,
|
||||||
|
)
|
||||||
|
subprocess.run(
|
||||||
|
["git", "commit", "-am", message], check=True, cwd=self.git_repo_dir
|
||||||
|
)
|
||||||
|
|
||||||
def classify(self, diff_id):
|
def classify(self, diff_id):
|
||||||
self.update_commit_db()
|
self.update_commit_db()
|
||||||
|
|
||||||
|
@ -247,17 +316,72 @@ class CommitClassifier(object):
|
||||||
with open("importance.html", "w") as f:
|
with open("importance.html", "w") as f:
|
||||||
f.write(importance["html"])
|
f.write(importance["html"])
|
||||||
|
|
||||||
|
# Get commit hash from 4 months before the analysis time.
|
||||||
|
# The method-level analyzer needs 4 months of history.
|
||||||
|
four_months_ago = datetime.utcnow() - relativedelta(months=4)
|
||||||
|
p = subprocess.run(
|
||||||
|
[
|
||||||
|
"git",
|
||||||
|
"rev-list",
|
||||||
|
"-n",
|
||||||
|
"1",
|
||||||
|
"--until={}".format(four_months_ago.strftime("%Y-%m-%d")),
|
||||||
|
"HEAD",
|
||||||
|
],
|
||||||
|
check=True,
|
||||||
|
capture_output=True,
|
||||||
|
cwd=self.git_repo_dir,
|
||||||
|
)
|
||||||
|
|
||||||
|
stop_hash = p.stdout.decode().strip()
|
||||||
|
|
||||||
|
# Run the method-level analyzer.
|
||||||
|
subprocess.run(
|
||||||
|
[
|
||||||
|
"python3",
|
||||||
|
"tester.py",
|
||||||
|
"--repo",
|
||||||
|
self.git_repo_dir,
|
||||||
|
"--start",
|
||||||
|
"HEAD",
|
||||||
|
"--stop",
|
||||||
|
stop_hash,
|
||||||
|
"--output",
|
||||||
|
os.path.abspath("method_level.csv"),
|
||||||
|
],
|
||||||
|
check=True,
|
||||||
|
cwd=self.method_defect_predictor_dir,
|
||||||
|
)
|
||||||
|
|
||||||
|
method_level_results = []
|
||||||
|
with open("method_level.csv", "r") as f:
|
||||||
|
reader = csv.DictReader(f)
|
||||||
|
for item in reader:
|
||||||
|
method_level_results.append(item)
|
||||||
|
|
||||||
|
with open("method_level.json", "w") as f:
|
||||||
|
json.dump(method_level_results, f)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
description = "Classify a commit"
|
description = "Classify a commit"
|
||||||
parser = argparse.ArgumentParser(description=description)
|
parser = argparse.ArgumentParser(description=description)
|
||||||
|
|
||||||
parser.add_argument("cache-root", help="Cache for repository clones.")
|
parser.add_argument("cache_root", help="Cache for repository clones.")
|
||||||
parser.add_argument("diff_id", help="diff ID to analyze.", type=int)
|
parser.add_argument("diff_id", help="diff ID to analyze.", type=int)
|
||||||
|
parser.add_argument(
|
||||||
|
"git_repo_dir", help="Path where the git repository will be cloned."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"method_defect_predictor_dir",
|
||||||
|
help="Path where the git repository will be cloned.",
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
classifier = CommitClassifier(getattr(args, "cache-root"))
|
classifier = CommitClassifier(
|
||||||
|
args.cache_root, args.git_repo_dir, args.method_defect_predictor_dir
|
||||||
|
)
|
||||||
classifier.classify(args.diff_id)
|
classifier.classify(args.diff_id)
|
||||||
|
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче