Store file-to-component mapping in a LMDB database instead of keeping it in memory

This reduces the peak memory usage while mining commits by ~150 MB.

Should help with #1374
This commit is contained in:
Marco Castelluccio 2020-03-18 23:39:28 +01:00
Родитель ed5e5bb97d
Коммит e5cec94e30
2 изменённых файлов: 57 добавлений и 37 удалений

Просмотреть файл

@ -179,7 +179,11 @@ class Commit:
self.files = files
self.file_copies = file_copies
self.components = list(
set(path_to_component[path] for path in files if path in path_to_component)
set(
path_to_component[path.encode("utf-8")].tobytes().decode("utf-8")
for path in files
if path.encode("utf-8") in path_to_component
)
)
self.directories = get_directories(files)
return self
@ -853,23 +857,29 @@ def set_commits_to_ignore(repo_dir, commits):
commit.ignored = should_ignore(commit)
def download_component_mapping():
def download_component_mapping(save=True):
global path_to_component
path_to_component = LMDBDict("data/component_mapping.lmdb")
if path_to_component is not None:
return
if save:
utils.download_check_etag(
"https://firefox-ci-tc.services.mozilla.com/api/index/v1/task/gecko.v2.mozilla-central.latest.source.source-bugzilla-info/artifacts/public/components.json",
"data/component_mapping.json",
)
utils.download_check_etag(
"https://firefox-ci-tc.services.mozilla.com/api/index/v1/task/gecko.v2.mozilla-central.latest.source.source-bugzilla-info/artifacts/public/components.json",
"data/component_mapping.json",
)
with open("data/component_mapping.json", "r") as f:
data = json.load(f)
with open("data/component_mapping.json", "r") as f:
path_to_component = json.load(f)
for path, component in data.items():
path_to_component[path.encode("utf-8")] = "::".join(component).encode(
"utf-8"
)
path_to_component = {
path: "::".join(component) for path, component in path_to_component.items()
}
def close_component_mapping():
global path_to_component
path_to_component.close()
path_to_component = None
def hg_log_multi(repo_dir, revs):
@ -924,7 +934,7 @@ def download_commits(repo_dir, rev_start=0, save=True, use_single_process=False)
print("Downloading file->component mapping...")
download_component_mapping()
download_component_mapping(save)
set_commits_to_ignore(repo_dir, commits)
@ -958,6 +968,8 @@ def download_commits(repo_dir, rev_start=0, save=True, use_single_process=False)
if save:
db.append(COMMITS_DB, commits)
close_component_mapping()
return commits

Просмотреть файл

@ -325,8 +325,8 @@ def test_download_component_mapping():
json={},
)
repository.download_component_mapping()
assert len(repository.path_to_component) == 0
repository.download_component_mapping(True)
repository.close_component_mapping()
repository.path_to_component = None
responses.reset()
@ -348,15 +348,19 @@ def test_download_component_mapping():
)
repository.download_component_mapping()
assert len(repository.path_to_component) == 2
assert repository.path_to_component["AUTHORS"] == "mozilla.org::Licensing"
assert repository.path_to_component["Cargo.lock"] == "Firefox Build System::General"
assert repository.path_to_component[b"AUTHORS"] == b"mozilla.org::Licensing"
assert (
repository.path_to_component[b"Cargo.lock"] == b"Firefox Build System::General"
)
repository.close_component_mapping()
responses.reset()
repository.download_component_mapping()
assert len(repository.path_to_component) == 2
assert repository.path_to_component["AUTHORS"] == "mozilla.org::Licensing"
assert repository.path_to_component["Cargo.lock"] == "Firefox Build System::General"
repository.download_component_mapping(False)
assert repository.path_to_component[b"AUTHORS"] == b"mozilla.org::Licensing"
assert (
repository.path_to_component[b"Cargo.lock"] == b"Firefox Build System::General"
)
repository.close_component_mapping()
repository.path_to_component = None
responses.reset()
@ -367,10 +371,12 @@ def test_download_component_mapping():
headers={"ETag": "101"},
)
repository.download_component_mapping()
assert len(repository.path_to_component) == 2
assert repository.path_to_component["AUTHORS"] == "mozilla.org::Licensing"
assert repository.path_to_component["Cargo.lock"] == "Firefox Build System::General"
repository.download_component_mapping(True)
assert repository.path_to_component[b"AUTHORS"] == b"mozilla.org::Licensing"
assert (
repository.path_to_component[b"Cargo.lock"] == b"Firefox Build System::General"
)
repository.close_component_mapping()
@pytest.mark.parametrize("use_single_process", [True, False])
@ -489,6 +495,8 @@ def test_get_directories():
def test_set_commits_to_ignore(tmpdir):
tmp_path = tmpdir.strpath
repository.path_to_component = {}
with open(os.path.join(tmp_path, ".hg-annotate-ignore-revs"), "w") as f:
f.write("commit1\ncommit2\n8ba995b74e18334ab3707f27e9eb8f4e37ba3d29\n")
@ -534,11 +542,11 @@ def test_set_commits_to_ignore(tmpdir):
def test_calculate_experiences():
repository.path_to_component = {
"dom/file1.cpp": "Core::DOM",
"dom/file1copied.cpp": "Core::DOM",
"dom/file2.cpp": "Core::Layout",
"apps/file1.jsm": "Firefox::Boh",
"apps/file2.jsm": "Firefox::Boh",
b"dom/file1.cpp": memoryview(b"Core::DOM"),
b"dom/file1copied.cpp": memoryview(b"Core::DOM"),
b"dom/file2.cpp": memoryview(b"Core::Layout"),
b"apps/file1.jsm": memoryview(b"Firefox::Boh"),
b"apps/file2.jsm": memoryview(b"Firefox::Boh"),
}
commits = {
@ -1010,11 +1018,11 @@ def test_calculate_experiences():
def test_calculate_experiences_no_save():
repository.path_to_component = {
"dom/file1.cpp": "Core::DOM",
"dom/file1copied.cpp": "Core::DOM",
"dom/file2.cpp": "Core::Layout",
"apps/file1.jsm": "Firefox::Boh",
"apps/file2.jsm": "Firefox::Boh",
b"dom/file1.cpp": memoryview(b"Core::DOM"),
b"dom/file1copied.cpp": memoryview(b"Core::DOM"),
b"dom/file2.cpp": memoryview(b"Core::Layout"),
b"apps/file1.jsm": memoryview(b"Firefox::Boh"),
b"apps/file2.jsm": memoryview(b"Firefox::Boh"),
}
commits = {