Add an option to use a single process for mining commits

This commit is contained in:
Marco Castelluccio 2020-03-13 17:18:37 +01:00
Родитель 99b09584ea
Коммит 5656b422a3
2 изменённых файлов: 30 добавлений и 16 удалений

Просмотреть файл

@ -912,7 +912,7 @@ def hg_log_multi(repo_dir, revs):
return commits
def download_commits(repo_dir, rev_start=0, save=True):
def download_commits(repo_dir, rev_start=0, save=True, use_single_process=False):
with hglib.open(repo_dir) as hg:
revs = get_revs(hg, rev_start)
if len(revs) == 0:
@ -921,9 +921,14 @@ def download_commits(repo_dir, rev_start=0, save=True):
first_pushdate = hg_log(hg, [b"0"])[0].pushdate
print(f"Mining {len(revs)} commits using {os.cpu_count()} processes...")
print(f"Mining {len(revs)} commits...")
commits = hg_log_multi(repo_dir, revs)
if not use_single_process:
print(f"Using {os.cpu_count()} processes...")
commits = hg_log_multi(repo_dir, revs)
else:
with hglib.open(repo_dir) as hg:
commits = hg_log(hg, revs)
print("Downloading file->component mapping...")
@ -933,7 +938,7 @@ def download_commits(repo_dir, rev_start=0, save=True):
commits_num = len(commits)
print(f"Mining {commits_num} commits using {os.cpu_count()} processes...")
print(f"Mining {commits_num} commits...")
global rs_parsepatch
import rs_parsepatch
@ -943,12 +948,16 @@ def download_commits(repo_dir, rev_start=0, save=True):
global code_analysis_server
code_analysis_server = rust_code_analysis_server.RustCodeAnalysisServer()
with concurrent.futures.ProcessPoolExecutor(
initializer=_init_process, initargs=(repo_dir,)
) as executor:
commits = executor.map(_transform, commits, chunksize=64)
commits = tqdm(commits, total=commits_num)
commits = list(commits)
if not use_single_process:
with concurrent.futures.ProcessPoolExecutor(
initializer=_init_process, initargs=(repo_dir,)
) as executor:
commits = executor.map(_transform, commits, chunksize=64)
commits = tqdm(commits, total=commits_num)
commits = list(commits)
else:
with hglib.open(repo_dir) as hg:
commits = [transform(hg, repo_dir, c) for c in commits]
code_analysis_server.terminate()

Просмотреть файл

@ -365,7 +365,8 @@ def test_download_component_mapping():
assert repository.path_to_component["Cargo.lock"] == "Firefox Build System::General"
def test_download_commits(fake_hg_repo):
@pytest.mark.parametrize("use_single_process", [True, False])
def test_download_commits(fake_hg_repo, use_single_process):
hg, local, remote = fake_hg_repo
# Allow using the local code analysis server.
@ -400,7 +401,7 @@ def test_download_commits(fake_hg_repo):
hg.push(dest=bytes(remote, "ascii"))
copy_pushlog_database(remote, local)
commits = repository.download_commits(local)
commits = repository.download_commits(local, use_single_process=use_single_process)
assert len(commits) == 0
commits = list(repository.get_commits())
assert len(commits) == 0
@ -413,7 +414,7 @@ def test_download_commits(fake_hg_repo):
hg.push(dest=bytes(remote, "ascii"))
copy_pushlog_database(remote, local)
commits = repository.download_commits(local)
commits = repository.download_commits(local, use_single_process=use_single_process)
assert len(commits) == 1
commits = list(repository.get_commits())
assert len(commits) == 1
@ -429,7 +430,9 @@ def test_download_commits(fake_hg_repo):
hg.push(dest=bytes(remote, "ascii"))
copy_pushlog_database(remote, local)
commits = repository.download_commits(local, revision3)
commits = repository.download_commits(
local, revision3, use_single_process=use_single_process
)
assert len(commits) == 1
commits = list(repository.get_commits())
assert len(commits) == 2
@ -442,13 +445,15 @@ def test_download_commits(fake_hg_repo):
os.remove("data/commits.json")
shutil.rmtree("data/commit_experiences.lmdb")
commits = repository.download_commits(local, f"children({revision2})")
commits = repository.download_commits(
local, f"children({revision2})", use_single_process=use_single_process
)
assert len(commits) == 1
assert len(list(repository.get_commits())) == 1
os.remove("data/commits.json")
shutil.rmtree("data/commit_experiences.lmdb")
commits = repository.download_commits(local)
commits = repository.download_commits(local, use_single_process=use_single_process)
assert len(list(repository.get_commits())) == 2