Run method-level risk analysis

This commit is contained in:
Marco Castelluccio 2019-10-01 12:14:44 +02:00
Родитель 53f40dd8fb
Коммит 509ecc5368
2 изменённых файлов: 132 добавлений и 4 удалений

Просмотреть файл

@ -34,6 +34,10 @@
"public/importance.html": { "public/importance.html": {
"path": "/importance.html", "path": "/importance.html",
"type": "file" "type": "file"
},
"public/method_level.json": {
"path": "/method_level.json",
"type": "file"
} }
}, },
"cache": { "cache": {

Просмотреть файл

@ -1,20 +1,32 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import argparse import argparse
import csv
import io import io
import json import json
import os import os
import subprocess
import tempfile
from datetime import datetime
from logging import INFO, basicConfig, getLogger from logging import INFO, basicConfig, getLogger
import hglib import hglib
import joblib import joblib
import numpy as np import numpy as np
from dateutil.relativedelta import relativedelta
from libmozdata import vcs_map
from libmozdata.phabricator import PhabricatorAPI from libmozdata.phabricator import PhabricatorAPI
from scipy.stats import spearmanr from scipy.stats import spearmanr
from bugbug import db, repository from bugbug import db, repository
from bugbug.models.regressor import RegressorModel from bugbug.models.regressor import RegressorModel
from bugbug.utils import download_check_etag, get_secret, to_array, zstd_decompress from bugbug.utils import (
download_check_etag,
get_secret,
retry,
to_array,
zstd_decompress,
)
basicConfig(level=INFO) basicConfig(level=INFO)
logger = getLogger(__name__) logger = getLogger(__name__)
@ -23,7 +35,7 @@ URL = "https://index.taskcluster.net/v1/task/project.relman.bugbug.train_regress
class CommitClassifier(object): class CommitClassifier(object):
def __init__(self, cache_root): def __init__(self, cache_root, git_repo_dir, method_defect_predictor_dir):
self.cache_root = cache_root self.cache_root = cache_root
assert os.path.isdir(cache_root), f"Cache root {cache_root} is not a dir." assert os.path.isdir(cache_root), f"Cache root {cache_root} is not a dir."
@ -63,6 +75,38 @@ class CommitClassifier(object):
self.X = to_array(joblib.load(regressormodel_data_X_path)) self.X = to_array(joblib.load(regressormodel_data_X_path))
self.y = to_array(joblib.load(regressormodel_data_y_path)) self.y = to_array(joblib.load(regressormodel_data_y_path))
self.method_defect_predictor_dir = method_defect_predictor_dir
self.clone_git_repo(
"https://github.com/lucapascarella/MethodDefectPredictor",
method_defect_predictor_dir,
"6215de02517eb3484d6943ce1a1fb6c13b27475f",
)
self.git_repo_dir = git_repo_dir
self.clone_git_repo("https://github.com/mozilla/gecko-dev", git_repo_dir)
def clone_git_repo(self, repo_url, repo_dir, rev="master"):
logger.info(f"Cloning {repo_url}...")
if not os.path.exists(repo_dir):
retry(
lambda: subprocess.run(["git", "clone", repo_url, repo_dir], check=True)
)
retry(
lambda: subprocess.run(
["git", "pull", repo_url, "master"],
cwd=repo_dir,
capture_output=True,
check=True,
)
)
retry(
lambda: subprocess.run(
["git", "checkout", rev], cwd=repo_dir, capture_output=True, check=True
)
)
def update_commit_db(self): def update_commit_db(self):
repository.clone(self.repo_dir) repository.clone(self.repo_dir)
@ -126,6 +170,17 @@ class CommitClassifier(object):
hg.update(rev=hg_base, clean=True) hg.update(rev=hg_base, clean=True)
logger.info(f"Updated repo to {hg_base}") logger.info(f"Updated repo to {hg_base}")
try:
self.git_base = vcs_map.mercurial_to_git(hg_base)
subprocess.run(
["git", "checkout", "-b", "analysis_branch", self.git_base],
check=True,
cwd=self.git_repo_dir,
)
logger.info(f"Updated git repo to {self.git_base}")
except Exception as e:
logger.info(f"Updating git repo to Mercurial {hg_base} failed: {e}")
for patch in needed_stack: for patch in needed_stack:
revision = revisions[patch.phid] revision = revisions[patch.phid]
@ -144,6 +199,20 @@ class CommitClassifier(object):
user="bugbug", user="bugbug",
) )
with tempfile.TemporaryDirectory() as tmpdirname:
temp_file = os.path.join(tmpdirname, "temp.patch")
with open(temp_file, "w") as f:
f.write(patch.patch)
subprocess.run(
["git", "apply", "--3way", temp_file],
check=True,
cwd=self.git_repo_dir,
)
subprocess.run(
["git", "commit", "-am", message], check=True, cwd=self.git_repo_dir
)
def classify(self, diff_id): def classify(self, diff_id):
self.update_commit_db() self.update_commit_db()
@ -247,17 +316,72 @@ class CommitClassifier(object):
with open("importance.html", "w") as f: with open("importance.html", "w") as f:
f.write(importance["html"]) f.write(importance["html"])
# Get commit hash from 4 months before the analysis time.
# The method-level analyzer needs 4 months of history.
four_months_ago = datetime.utcnow() - relativedelta(months=4)
p = subprocess.run(
[
"git",
"rev-list",
"-n",
"1",
"--until={}".format(four_months_ago.strftime("%Y-%m-%d")),
"HEAD",
],
check=True,
capture_output=True,
cwd=self.git_repo_dir,
)
stop_hash = p.stdout.decode().strip()
# Run the method-level analyzer.
subprocess.run(
[
"python3",
"tester.py",
"--repo",
self.git_repo_dir,
"--start",
"HEAD",
"--stop",
stop_hash,
"--output",
os.path.abspath("method_level.csv"),
],
check=True,
cwd=self.method_defect_predictor_dir,
)
method_level_results = []
with open("method_level.csv", "r") as f:
reader = csv.DictReader(f)
for item in reader:
method_level_results.append(item)
with open("method_level.json", "w") as f:
json.dump(method_level_results, f)
def main(): def main():
description = "Classify a commit" description = "Classify a commit"
parser = argparse.ArgumentParser(description=description) parser = argparse.ArgumentParser(description=description)
parser.add_argument("cache-root", help="Cache for repository clones.") parser.add_argument("cache_root", help="Cache for repository clones.")
parser.add_argument("diff_id", help="diff ID to analyze.", type=int) parser.add_argument("diff_id", help="diff ID to analyze.", type=int)
parser.add_argument(
"git_repo_dir", help="Path where the git repository will be cloned."
)
parser.add_argument(
"method_defect_predictor_dir",
help="Path where the git repository will be cloned.",
)
args = parser.parse_args() args = parser.parse_args()
classifier = CommitClassifier(getattr(args, "cache-root")) classifier = CommitClassifier(
args.cache_root, args.git_repo_dir, args.method_defect_predictor_dir
)
classifier.classify(args.diff_id) classifier.classify(args.diff_id)