зеркало из https://github.com/mozilla/bugbug.git
603 строки
22 KiB
Python
603 строки
22 KiB
Python
# -*- coding: utf-8 -*-
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
import argparse
|
|
import concurrent.futures
|
|
import os
|
|
import subprocess
|
|
import threading
|
|
from collections import defaultdict
|
|
from datetime import datetime
|
|
from logging import INFO, basicConfig, getLogger
|
|
|
|
import dateutil.parser
|
|
import hglib
|
|
from dateutil.relativedelta import relativedelta
|
|
from libmozdata import vcs_map
|
|
from microannotate import utils as microannotate_utils
|
|
from pydriller import GitRepository
|
|
from tqdm import tqdm
|
|
|
|
from bugbug import bugzilla, db, repository
|
|
from bugbug.models.defect_enhancement_task import DefectEnhancementTaskModel
|
|
from bugbug.models.regression import RegressionModel
|
|
from bugbug.utils import download_check_etag, retry, zstd_compress, zstd_decompress
|
|
|
|
basicConfig(level=INFO)
|
|
logger = getLogger(__name__)
|
|
|
|
thread_local = threading.local()
|
|
|
|
MAX_MODIFICATION_NUMBER = 50
|
|
RELATIVE_START_DATE = relativedelta(years=2, months=6)
|
|
# Only needed because mercurial<->git mapping could be behind.
|
|
RELATIVE_END_DATE = relativedelta(days=7)
|
|
|
|
IGNORED_COMMITS_DB = "data/ignored_commits.json"
|
|
db.register(
|
|
IGNORED_COMMITS_DB,
|
|
"https://index.taskcluster.net/v1/task/project.relman.bugbug_annotate.regressor_finder.latest/artifacts/public/ignored_commits.json.zst",
|
|
1,
|
|
)
|
|
|
|
BUG_FIXING_COMMITS_DB = "data/bug_fixing_commits.json"
|
|
db.register(
|
|
BUG_FIXING_COMMITS_DB,
|
|
"https://index.taskcluster.net/v1/task/project.relman.bugbug_annotate.regressor_finder.latest/artifacts/public/bug_fixing_commits.json.zst",
|
|
1,
|
|
)
|
|
|
|
BUG_INTRODUCING_COMMITS_DB = "data/bug_introducing_commits.json"
|
|
db.register(
|
|
BUG_INTRODUCING_COMMITS_DB,
|
|
"https://index.taskcluster.net/v1/task/project.relman.bugbug_annotate.regressor_finder.latest/artifacts/public/bug_introducing_commits.json.zst",
|
|
1,
|
|
)
|
|
|
|
TOKENIZED_BUG_INTRODUCING_COMMITS_DB = "data/tokenized_bug_introducing_commits.json"
|
|
db.register(
|
|
TOKENIZED_BUG_INTRODUCING_COMMITS_DB,
|
|
"https://index.taskcluster.net/v1/task/project.relman.bugbug_annotate.regressor_finder.latest/artifacts/public/tokenized_bug_introducing_commits.json.zst",
|
|
1,
|
|
)
|
|
|
|
|
|
BASE_URL = "https://index.taskcluster.net/v1/task/project.relman.bugbug.train_{model_name}.latest/artifacts/public/{model_name}model.zst"
|
|
|
|
|
|
def download_model(model_name):
|
|
if not os.path.exists(f"{model_name}model"):
|
|
url = BASE_URL.format(model_name=model_name)
|
|
logger.info(f"Downloading {url}...")
|
|
download_check_etag(url, f"{model_name}model.zst")
|
|
zstd_decompress(f"{model_name}model")
|
|
assert os.path.exists(f"{model_name}model"), "Decompressed file exists"
|
|
|
|
|
|
class RegressorFinder(object):
|
|
def __init__(
|
|
self,
|
|
cache_root,
|
|
git_repo_url,
|
|
git_repo_dir,
|
|
tokenized_git_repo_url,
|
|
tokenized_git_repo_dir,
|
|
):
|
|
self.mercurial_repo_dir = os.path.join(cache_root, "mozilla-central")
|
|
self.git_repo_url = git_repo_url
|
|
self.git_repo_dir = git_repo_dir
|
|
self.tokenized_git_repo_url = tokenized_git_repo_url
|
|
self.tokenized_git_repo_dir = tokenized_git_repo_dir
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
|
|
|
logger.info(f"Cloning mercurial repository to {self.mercurial_repo_dir}...")
|
|
executor.submit(repository.clone, self.mercurial_repo_dir)
|
|
|
|
logger.info(f"Cloning {self.git_repo_url} to {self.git_repo_dir}...")
|
|
executor.submit(self.clone_git_repo, self.git_repo_url, self.git_repo_dir)
|
|
|
|
logger.info(
|
|
f"Cloning {self.tokenized_git_repo_url} to {self.tokenized_git_repo_dir}..."
|
|
)
|
|
executor.submit(
|
|
self.clone_git_repo,
|
|
self.tokenized_git_repo_url,
|
|
self.tokenized_git_repo_dir,
|
|
)
|
|
|
|
logger.info(f"Initializing mapping between git and mercurial commits...")
|
|
self.init_mapping()
|
|
|
|
def clone_git_repo(self, repo_url, repo_dir):
|
|
if not os.path.exists(repo_dir):
|
|
retry(
|
|
lambda: subprocess.run(["git", "clone", repo_url, repo_dir], check=True)
|
|
)
|
|
|
|
retry(
|
|
lambda: subprocess.run(
|
|
["git", "pull", repo_url, "master"],
|
|
cwd=repo_dir,
|
|
capture_output=True,
|
|
check=True,
|
|
)
|
|
)
|
|
|
|
def init_mapping(self):
|
|
logger.info("Downloading Mercurial <-> git mapping file...")
|
|
vcs_map.download_mapfile()
|
|
|
|
self.tokenized_git_to_mercurial, self.mercurial_to_tokenized_git = microannotate_utils.get_commit_mapping(
|
|
self.tokenized_git_repo_dir
|
|
)
|
|
|
|
# TODO: Make repository module analyze all commits, even those to ignore, but add a field "ignore" or a function should_ignore that analyzes the commit data. This way we don't have to clone the Mercurial repository in this script.
|
|
def get_commits_to_ignore(self):
|
|
logger.info("Download previous commits to ignore...")
|
|
if db.is_old_version(IGNORED_COMMITS_DB) or not db.exists(IGNORED_COMMITS_DB):
|
|
db.download(IGNORED_COMMITS_DB, force=True)
|
|
|
|
logger.info("Get previously classified commits...")
|
|
prev_commits_to_ignore = list(db.read(IGNORED_COMMITS_DB))
|
|
logger.info(f"Already found {len(prev_commits_to_ignore)} commits to ignore...")
|
|
|
|
if len(prev_commits_to_ignore) > 0:
|
|
rev_start = "children({})".format(prev_commits_to_ignore[-1]["rev"])
|
|
else:
|
|
rev_start = 0
|
|
|
|
# 2 days more than the end date, so we can know if a commit was backed-out.
|
|
# We have to do this as recent commits might be missing in the mercurial <-> git map,
|
|
# otherwise we could just use "tip".
|
|
end_date = datetime.now() - RELATIVE_END_DATE + relativedelta(2)
|
|
with hglib.open(self.mercurial_repo_dir) as hg:
|
|
revs = repository.get_revs(
|
|
hg, rev_start, "pushdate('{}')".format(end_date.strftime("%Y-%m-%d"))
|
|
)
|
|
|
|
# Given that we use the pushdate, there might be cases where the starting commit is returned too (e.g. if we rerun the task on the same day).
|
|
if len(prev_commits_to_ignore) > 0:
|
|
found_prev = -1
|
|
for i, rev in enumerate(revs):
|
|
if rev.decode("utf-8") == prev_commits_to_ignore[-1]["rev"]:
|
|
found_prev = i
|
|
break
|
|
revs = revs[found_prev + 1 :]
|
|
|
|
commits = repository.hg_log_multi(self.mercurial_repo_dir, revs)
|
|
|
|
repository.set_commits_to_ignore(self.mercurial_repo_dir, commits)
|
|
commits_to_ignore = []
|
|
|
|
for commit in commits:
|
|
if commit.ignored or commit.backedoutby:
|
|
commits_to_ignore.append(
|
|
{
|
|
"rev": commit.node,
|
|
"type": "backedout" if commit.backedoutby else "",
|
|
}
|
|
)
|
|
|
|
logger.info(f"{len(commits_to_ignore)} new commits to ignore...")
|
|
|
|
logger.info(
|
|
"...of which {} are backed-out".format(
|
|
sum(1 for commit in commits_to_ignore if commit["type"] == "backedout")
|
|
)
|
|
)
|
|
|
|
db.append(IGNORED_COMMITS_DB, commits_to_ignore)
|
|
zstd_compress(IGNORED_COMMITS_DB)
|
|
|
|
return prev_commits_to_ignore + commits_to_ignore
|
|
|
|
def find_bug_fixing_commits(self):
|
|
logger.info("Downloading commits database...")
|
|
if db.is_old_version(repository.COMMITS_DB) or not db.exists(
|
|
repository.COMMITS_DB
|
|
):
|
|
db.download(repository.COMMITS_DB, force=True)
|
|
|
|
logger.info("Downloading bugs database...")
|
|
if db.is_old_version(bugzilla.BUGS_DB) or not db.exists(bugzilla.BUGS_DB):
|
|
db.download(bugzilla.BUGS_DB, force=True)
|
|
|
|
logger.info("Download previous classifications...")
|
|
if db.is_old_version(BUG_FIXING_COMMITS_DB) or not db.exists(
|
|
BUG_FIXING_COMMITS_DB
|
|
):
|
|
db.download(BUG_FIXING_COMMITS_DB, force=True)
|
|
|
|
logger.info("Get previously classified commits...")
|
|
prev_bug_fixing_commits = list(db.read(BUG_FIXING_COMMITS_DB))
|
|
prev_bug_fixing_commits_nodes = set(
|
|
bug_fixing_commit["rev"] for bug_fixing_commit in prev_bug_fixing_commits
|
|
)
|
|
logger.info(f"Already classified {len(prev_bug_fixing_commits)} commits...")
|
|
|
|
# TODO: Switch to the pure Defect model, as it's better in this case.
|
|
logger.info("Downloading defect/enhancement/task model...")
|
|
download_model("defectenhancementtask")
|
|
defect_model = DefectEnhancementTaskModel.load("defectenhancementtaskmodel")
|
|
|
|
logger.info("Downloading regression model...")
|
|
download_model("regression")
|
|
regression_model = RegressionModel.load("regressionmodel")
|
|
|
|
start_date = datetime.now() - RELATIVE_START_DATE
|
|
end_date = datetime.now() - RELATIVE_END_DATE
|
|
logger.info(
|
|
f"Gathering bug IDs associated to commits (since {start_date} and up to {end_date})..."
|
|
)
|
|
commit_map = defaultdict(list)
|
|
for commit in repository.get_commits():
|
|
if commit["node"] in prev_bug_fixing_commits_nodes:
|
|
continue
|
|
|
|
commit_date = dateutil.parser.parse(commit["pushdate"])
|
|
if commit_date < start_date or commit_date > end_date:
|
|
continue
|
|
|
|
commit_map[commit["bug_id"]].append(commit["node"])
|
|
|
|
logger.info(
|
|
f"{sum(len(commit_list) for commit_list in commit_map.values())} commits found, {len(commit_map)} bugs linked to commits"
|
|
)
|
|
assert len(commit_map) > 0
|
|
|
|
def get_relevant_bugs():
|
|
return (bug for bug in bugzilla.get_bugs() if bug["id"] in commit_map)
|
|
|
|
bug_count = sum(1 for bug in get_relevant_bugs())
|
|
logger.info(
|
|
f"{bug_count} bugs in total, {len(commit_map) - bug_count} bugs linked to commits missing"
|
|
)
|
|
|
|
known_defect_labels = defect_model.get_labels()
|
|
known_regression_labels = regression_model.get_labels()
|
|
|
|
bug_fixing_commits = []
|
|
|
|
def append_bug_fixing_commits(bug_id, type_):
|
|
for commit in commit_map[bug_id]:
|
|
bug_fixing_commits.append({"rev": commit, "type": type_})
|
|
|
|
for bug in tqdm(get_relevant_bugs(), total=bug_count):
|
|
# Ignore bugs which are not linked to the commits we care about.
|
|
if bug["id"] not in commit_map:
|
|
continue
|
|
|
|
# If we know the label already, we don't need to apply the model.
|
|
if (
|
|
bug["id"] in known_regression_labels
|
|
and known_regression_labels[bug["id"]] == 1
|
|
):
|
|
append_bug_fixing_commits(bug["id"], "r")
|
|
continue
|
|
|
|
if bug["id"] in known_defect_labels:
|
|
if known_defect_labels[bug["id"]] == "defect":
|
|
append_bug_fixing_commits(bug["id"], "d")
|
|
else:
|
|
append_bug_fixing_commits(bug["id"], "e")
|
|
continue
|
|
|
|
if defect_model.classify(bug)[0] == "defect":
|
|
if regression_model.classify(bug)[0] == 1:
|
|
append_bug_fixing_commits(bug["id"], "r")
|
|
else:
|
|
append_bug_fixing_commits(bug["id"], "d")
|
|
else:
|
|
append_bug_fixing_commits(bug["id"], "e")
|
|
|
|
db.append(BUG_FIXING_COMMITS_DB, bug_fixing_commits)
|
|
zstd_compress(BUG_FIXING_COMMITS_DB)
|
|
|
|
bug_fixing_commits = prev_bug_fixing_commits + bug_fixing_commits
|
|
return [
|
|
bug_fixing_commit
|
|
for bug_fixing_commit in bug_fixing_commits
|
|
if bug_fixing_commit["type"] in ["r", "d"]
|
|
]
|
|
|
|
def find_bug_introducing_commits(
|
|
self, bug_fixing_commits, commits_to_ignore, tokenized
|
|
):
|
|
if tokenized:
|
|
db_path = TOKENIZED_BUG_INTRODUCING_COMMITS_DB
|
|
repo_dir = self.tokenized_git_repo_dir
|
|
else:
|
|
db_path = BUG_INTRODUCING_COMMITS_DB
|
|
repo_dir = self.git_repo_dir
|
|
|
|
def git_to_mercurial(rev):
|
|
if tokenized:
|
|
return self.tokenized_git_to_mercurial[rev]
|
|
else:
|
|
return vcs_map.git_to_mercurial(rev)
|
|
|
|
def mercurial_to_git(rev):
|
|
if tokenized:
|
|
return self.mercurial_to_tokenized_git[rev]
|
|
else:
|
|
return vcs_map.mercurial_to_git(rev)
|
|
|
|
logger.info("Download previously found bug-introducing commits...")
|
|
if db.is_old_version(db_path) or not db.exists(db_path):
|
|
db.download(db_path, force=True)
|
|
|
|
logger.info("Get previously found bug-introducing commits...")
|
|
prev_bug_introducing_commits = list(db.read(db_path))
|
|
prev_bug_introducing_commits_nodes = set(
|
|
bug_introducing_commit["bug_fixing_rev"]
|
|
for bug_introducing_commit in prev_bug_introducing_commits
|
|
)
|
|
logger.info(
|
|
f"Already classified {len(prev_bug_introducing_commits)} commits..."
|
|
)
|
|
|
|
hashes_to_ignore = set(commit["rev"] for commit in commits_to_ignore)
|
|
|
|
with open("git_hashes_to_ignore", "w") as f:
|
|
f.writelines(
|
|
"{}\n".format(mercurial_to_git(commit["rev"]))
|
|
for commit in commits_to_ignore
|
|
if not tokenized or commit["rev"] in self.mercurial_to_tokenized_git
|
|
)
|
|
|
|
logger.info(f"{len(bug_fixing_commits)} commits to analyze")
|
|
|
|
# Skip already found bug-introducing commits.
|
|
bug_fixing_commits = [
|
|
bug_fixing_commit
|
|
for bug_fixing_commit in bug_fixing_commits
|
|
if bug_fixing_commit["rev"] not in prev_bug_introducing_commits_nodes
|
|
]
|
|
|
|
logger.info(
|
|
f"{len(bug_fixing_commits)} commits left to analyze after skipping already analyzed ones"
|
|
)
|
|
|
|
bug_fixing_commits = [
|
|
bug_fixing_commit
|
|
for bug_fixing_commit in bug_fixing_commits
|
|
if bug_fixing_commit["rev"] not in hashes_to_ignore
|
|
]
|
|
logger.info(
|
|
f"{len(bug_fixing_commits)} commits left to analyze after skipping the ones in the ignore list"
|
|
)
|
|
|
|
if tokenized:
|
|
bug_fixing_commits = [
|
|
bug_fixing_commit
|
|
for bug_fixing_commit in bug_fixing_commits
|
|
if bug_fixing_commit["rev"] in self.mercurial_to_tokenized_git
|
|
]
|
|
logger.info(
|
|
f"{len(bug_fixing_commits)} commits left to analyze after skipping the ones with no git hash"
|
|
)
|
|
|
|
# Analyze up to 500 commits at a time, to avoid the task running out of time.
|
|
done = True
|
|
if len(bug_fixing_commits) > 500:
|
|
bug_fixing_commits = bug_fixing_commits[-500:]
|
|
done = False
|
|
|
|
with open("done", "w") as f:
|
|
f.write(str(1 if done else 0))
|
|
|
|
def _init(git_repo_dir):
|
|
thread_local.git = GitRepository(git_repo_dir)
|
|
|
|
def find_bic(bug_fixing_commit):
|
|
logger.info("Analyzing {}...".format(bug_fixing_commit["rev"]))
|
|
|
|
git_fix_revision = mercurial_to_git(bug_fixing_commit["rev"])
|
|
|
|
commit = thread_local.git.get_commit(git_fix_revision)
|
|
|
|
# Skip huge changes, we'll likely be wrong with them.
|
|
if len(commit.modifications) > MAX_MODIFICATION_NUMBER:
|
|
logger.info(
|
|
"Skipping {} as it is too big".format(bug_fixing_commit["rev"])
|
|
)
|
|
return [None]
|
|
|
|
bug_introducing_modifications = thread_local.git.get_commits_last_modified_lines(
|
|
commit, hashes_to_ignore_path=os.path.realpath("git_hashes_to_ignore")
|
|
)
|
|
|
|
logger.info(
|
|
"Found {} for {}".format(
|
|
bug_introducing_modifications, bug_fixing_commit["rev"]
|
|
)
|
|
)
|
|
|
|
bug_introducing_commits = []
|
|
for bug_introducing_hashes in bug_introducing_modifications.values():
|
|
for bug_introducing_hash in bug_introducing_hashes:
|
|
try:
|
|
bug_introducing_commits.append(
|
|
{
|
|
"bug_fixing_rev": bug_fixing_commit["rev"],
|
|
"bug_introducing_rev": git_to_mercurial(
|
|
bug_introducing_hash
|
|
),
|
|
}
|
|
)
|
|
except Exception as e:
|
|
# Skip commits that are in git but not in mercurial, as they are too old (older than "Free the lizard").
|
|
if not str(e).startswith("Missing git commit in the VCS map"):
|
|
raise
|
|
|
|
# Add an empty result, just so that we don't reanalyze this again.
|
|
if len(bug_introducing_commits) == 0:
|
|
bug_introducing_commits.append(
|
|
{
|
|
"bug_fixing_rev": bug_fixing_commit["rev"],
|
|
"bug_introducing_rev": "",
|
|
}
|
|
)
|
|
|
|
return bug_introducing_commits
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(
|
|
initializer=_init, initargs=(repo_dir,), max_workers=os.cpu_count() + 1
|
|
) as executor:
|
|
bug_introducing_commit_futures = [
|
|
executor.submit(find_bic, bug_fixing_commit)
|
|
for bug_fixing_commit in bug_fixing_commits
|
|
]
|
|
|
|
def results():
|
|
for future in tqdm(
|
|
concurrent.futures.as_completed(bug_introducing_commit_futures),
|
|
total=len(bug_fixing_commits),
|
|
):
|
|
result = future.result()
|
|
if result is not None:
|
|
yield from result
|
|
|
|
db.append(db_path, results())
|
|
|
|
zstd_compress(db_path)
|
|
|
|
|
|
def evaluate(bug_introducing_commits):
|
|
logger.info("Building bug -> commits map...")
|
|
bug_to_commits_map = defaultdict(list)
|
|
for commit in tqdm(repository.get_commits()):
|
|
bug_to_commits_map[commit["bug_id"]].append(commit["node"])
|
|
|
|
logger.info("Loading known regressors using regressed-by information...")
|
|
known_regressors = {}
|
|
for bug in tqdm(bugzilla.get_bugs()):
|
|
if bug["regressed_by"]:
|
|
known_regressors[bug["id"]] = bug["regressed_by"]
|
|
logger.info(f"Loaded {len(known_regressors)} known regressors")
|
|
|
|
fix_to_regressors_map = defaultdict(list)
|
|
for bug_introducing_commit in bug_introducing_commits:
|
|
fix_to_regressors_map[bug_introducing_commit["bug_fixing_rev"]].append(
|
|
bug_introducing_commit["bug_introducing_rev"]
|
|
)
|
|
logger.info(f"{len(fix_to_regressors_map)} fixes linked to regressors")
|
|
logger.info(
|
|
f"{sum(len(regressors) for regressors in fix_to_regressors_map.values())} regressors linked to fixes"
|
|
)
|
|
|
|
logger.info("Measuring how many known regressors SZZ was able to find correctly...")
|
|
all_regressors = 0
|
|
perfect_regressors = 0
|
|
found_regressors = 0
|
|
misassigned_regressors = 0
|
|
for bug_id, regressor_bugs in tqdm(known_regressors.items()):
|
|
# Get all commits which fixed the bug.
|
|
fix_commits = bug_to_commits_map[bug_id] if bug_id in bug_to_commits_map else []
|
|
if len(fix_commits) == 0:
|
|
continue
|
|
|
|
# Skip bug/regressor when we didn't analyze the commits to fix the bug (as
|
|
# certainly we can't have found the regressor in this case).
|
|
if not any(fix_commit in fix_to_regressors_map for fix_commit in fix_commits):
|
|
continue
|
|
|
|
# Get all commits linked to the regressor bug.
|
|
regressor_commits = []
|
|
for regressor_bug in regressor_bugs:
|
|
if regressor_bug not in bug_to_commits_map:
|
|
continue
|
|
|
|
regressor_commits += (
|
|
commit for commit in bug_to_commits_map[regressor_bug]
|
|
)
|
|
|
|
if len(regressor_commits) == 0:
|
|
continue
|
|
|
|
found_good = False
|
|
found_bad = False
|
|
for fix_commit in fix_commits:
|
|
# Check if we found at least a correct regressor.
|
|
if any(
|
|
regressor_commit in regressor_commits
|
|
for regressor_commit in fix_to_regressors_map[fix_commit]
|
|
):
|
|
found_good = True
|
|
|
|
# Check if we found at least a wrong regressor.
|
|
if any(
|
|
regressor_commit not in regressor_commits
|
|
for regressor_commit in fix_to_regressors_map[fix_commit]
|
|
):
|
|
found_bad = True
|
|
|
|
all_regressors += 1
|
|
|
|
if found_good and not found_bad:
|
|
perfect_regressors += 1
|
|
if found_good:
|
|
found_regressors += 1
|
|
if found_bad:
|
|
misassigned_regressors += 1
|
|
|
|
logger.info(
|
|
f"Perfectly found {perfect_regressors} regressors out of {all_regressors}"
|
|
)
|
|
logger.info(f"Found {found_regressors} regressors out of {all_regressors}")
|
|
logger.info(
|
|
f"Misassigned {misassigned_regressors} regressors out of {all_regressors}"
|
|
)
|
|
|
|
|
|
def main():
|
|
description = "Find bug-introducing commits from bug-fixing commits"
|
|
parser = argparse.ArgumentParser(description=description)
|
|
|
|
parser.add_argument("cache_root", help="Cache for repository clones.")
|
|
parser.add_argument(
|
|
"git_repo_url", help="URL to the git repository on which to run SZZ."
|
|
)
|
|
parser.add_argument(
|
|
"git_repo_dir", help="Path where the git repository will be cloned."
|
|
)
|
|
parser.add_argument(
|
|
"tokenized_git_repo_url",
|
|
help="URL to the tokenized git repository on which to run SZZ.",
|
|
)
|
|
parser.add_argument(
|
|
"tokenized_git_repo_dir",
|
|
help="Path where the tokenized git repository will be cloned.",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
regressor_finder = RegressorFinder(
|
|
args.cache_root,
|
|
args.git_repo_url,
|
|
args.git_repo_dir,
|
|
args.tokenized_git_repo_url,
|
|
args.tokenized_git_repo_dir,
|
|
)
|
|
|
|
commits_to_ignore = regressor_finder.get_commits_to_ignore()
|
|
|
|
bug_fixing_commits = regressor_finder.find_bug_fixing_commits()
|
|
|
|
regressor_finder.find_bug_introducing_commits(
|
|
bug_fixing_commits, commits_to_ignore, True
|
|
)
|
|
evaluate(db.read(TOKENIZED_BUG_INTRODUCING_COMMITS_DB))
|
|
|
|
regressor_finder.find_bug_introducing_commits(
|
|
bug_fixing_commits, commits_to_ignore, False
|
|
)
|
|
evaluate(db.read(BUG_INTRODUCING_COMMITS_DB))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|