Support cloning a revision's repository to build references to found issues (#1517)

* Add mercurial & robustcheckout to the bot image

* Add a method to clone a mercurial repo

* Use hglib only

* Update cli arguments helper

* Fixes

* Add tests for the mercurial helper
This commit is contained in:
Valentin Rigal 2023-05-03 14:43:05 +02:00 коммит произвёл Andi-Bogdan Postelnicu
Родитель 080b174789
Коммит a694e1bddd
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B2D1B467E22FB910
7 изменённых файлов: 192 добавлений и 10 удалений

Просмотреть файл

@ -1,3 +1,3 @@
[settings]
profile = black
known_first_party = code_review_backend,code_review_bot,code_review_tools,code_review_events
known_first_party = code_review_backend,code_review_bot,code_review_tools,code_review_events

Просмотреть файл

@ -51,8 +51,10 @@ def parse_cli():
)
parser.add_argument(
"--mercurial-repository",
help="Optional path to a mercurial repository matching the analyzed revision.\n"
"Improves reading the updated files, i.e. to compute the unique hash of an issue.",
help="Optional path to a up-to-date mercurial repository matching the analyzed revision.\n"
"Reduce the time required to read updated files, i.e. to compute the unique hash of multiple issues.\n"
"A clone is automatically performed when ingesting a revision and this option is unset, "
"except on a developer instance (where HGMO is used).",
type=Path,
default=None,
)

Просмотреть файл

@ -0,0 +1,89 @@
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import fcntl
import os
import time
from contextlib import contextmanager
from pathlib import Path
from tempfile import TemporaryDirectory
import hglib
import structlog
logger = structlog.get_logger(__name__)
def hg_run(cmd):
"""
Run a mercurial command without an hglib instance
Useful for initial custom clones
Redirects stdout & stderr to python's logger
This code has been copied from the libmozevent library
https://github.com/mozilla/libmozevent/blob/fd0b3689c50c3d14ac82302b31115d0046c6e7c8/libmozevent/utils.py#L77
"""
def _log_process(output, name):
# Read and display every line
out = output.read()
if out is None:
return
text = filter(None, out.decode("utf-8").splitlines())
for line in text:
logger.info("{}: {}".format(name, line))
# Start process
main_cmd = cmd[0]
proc = hglib.util.popen([hglib.HGPATH] + cmd)
# Set process outputs as non blocking
for output in (proc.stdout, proc.stderr):
fcntl.fcntl(
output.fileno(),
fcntl.F_SETFL,
fcntl.fcntl(output, fcntl.F_GETFL) | os.O_NONBLOCK,
)
while proc.poll() is None:
_log_process(proc.stdout, main_cmd)
_log_process(proc.stderr, "{} (err)".format(main_cmd))
time.sleep(2)
out, err = proc.communicate()
if proc.returncode != 0:
logger.error(
"Mercurial {} failure".format(main_cmd), out=out, err=err, exc_info=True
)
raise hglib.error.CommandError(cmd, proc.returncode, out, err)
return out
def robust_checkout(repo_url, checkout_dir, sharebase_dir, branch):
cmd = hglib.util.cmdbuilder(
"robustcheckout",
repo_url,
checkout_dir,
purge=True,
sharebase=sharebase_dir,
branch=branch,
)
hg_run(cmd)
@contextmanager
def clone_repository(repo_url, branch="tip"):
"""
Clones a repository to a temporary directory using robustcheckout.
"""
with TemporaryDirectory() as temp_path:
temp_path = Path(temp_path)
sharebase_dir = (temp_path / "shared").absolute()
sharebase_dir.mkdir()
# Do not create the checkout folder or Mercurial will complain about a missing .hg file
checkout_dir = (temp_path / "checkout").absolute()
robust_checkout(repo_url, str(checkout_dir), str(sharebase_dir), branch)
yield checkout_dir

Просмотреть файл

@ -3,6 +3,7 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from contextlib import nullcontext
from datetime import datetime, timedelta
import structlog
@ -12,6 +13,7 @@ from taskcluster.utils import stringDate
from code_review_bot import Level, stats
from code_review_bot.backend import BackendAPI
from code_review_bot.config import REPO_AUTOLAND, REPO_MOZILLA_CENTRAL, settings
from code_review_bot.mercurial import clone_repository
from code_review_bot.report.debug import DebugReporter
from code_review_bot.revisions import Revision
from code_review_bot.tasks.base import AnalysisTask, BaseTask, NoticeTask
@ -29,6 +31,9 @@ logger = structlog.get_logger(__name__)
TASKCLUSTER_NAMESPACE = "project.relman.{channel}.code-review.{name}"
TASKCLUSTER_INDEX_TTL = 7 # in days
# Max number of issues published to the backend at a time during the ingestion of a revision
BULK_ISSUE_CHUNKS = 100
class Workflow(object):
"""
@ -169,17 +174,30 @@ class Workflow(object):
self.backend_api.publish_revision(revision)
# Publish issues when there are some
if issues:
if self.mercurial_repository:
logger.info("Using the local repository to build issues")
if not issues:
logger.info("No issues for that revision")
return
context_manager = nullcontext(self.mercurial_repository)
# Do always clone the repository on production to speed up reading issues
if (
self.mercurial_repository is None
and settings.taskcluster.task_id != "local instance"
):
logger.info(
f"Cloning revision to build issues (checkout to {revision.mercurial_revision})"
)
context_manager = clone_repository(
repo_url=revision.repository, branch=revision.mercurial_revision
)
with context_manager as repo_path:
self.backend_api.publish_issues(
issues,
revision,
mercurial_repository=self.mercurial_repository,
bulk=100,
mercurial_repository=repo_path,
bulk=BULK_ISSUE_CHUNKS,
)
else:
logger.info("No issues for that revision")
def publish(self, revision, issues, task_failures, notices, reviewers):
"""

Просмотреть файл

@ -6,4 +6,7 @@ RUN cd /src/tools && python setup.py install
ADD bot /src/bot
RUN cd /src/bot && python setup.py install
# Add mercurial & robustcheckout
RUN /src/tools/docker/bootstrap-mercurial.sh
CMD ["code-review-bot"]

Просмотреть файл

@ -1,4 +1,5 @@
-e ../tools #egg=code-review-tools
influxdb==5.3.1
libmozdata==0.2.0
python-hglib==2.6.2
pyyaml==6.0

Просмотреть файл

@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from pathlib import PosixPath
from code_review_bot import mercurial
class STDOutputMock:
def fileno(self):
return 4
content = ""
class PopenMock:
stdout = STDOutputMock()
stderr = STDOutputMock()
returncode = 0
def poll(self):
return True
def communicate(self):
out = self.stdout.content = "Hello world"
err = self.stderr.content = "An error occurred"
return out, err
def __call__(self, command):
self.command = command
return self
def test_hg_run(monkeypatch):
popen_mock = PopenMock()
monkeypatch.setattr("hglib.util.popen", popen_mock)
mercurial.hg_run(["checkout", "https://hg.repo/", "--test"])
assert popen_mock.command == ["hg", "checkout", "https://hg.repo/", "--test"]
assert popen_mock.stdout.content == "Hello world"
assert popen_mock.stderr.content == "An error occurred"
def test_clone_repository_context_manager(monkeypatch):
popen_mock = PopenMock()
monkeypatch.setattr("hglib.util.popen", popen_mock)
with mercurial.clone_repository(
"https://hg.repo/", branch="default"
) as repo_checkout:
assert isinstance(repo_checkout, PosixPath)
assert str(repo_checkout.absolute()).startswith("/tmp/")
assert repo_checkout.stem == "checkout"
parent_folder = repo_checkout.parent.absolute()
assert parent_folder.exists()
assert str(parent_folder) != "/tmp"
assert popen_mock.command == [
"hg",
"robustcheckout",
b"--purge",
f"--sharebase={parent_folder}/shared".encode(),
b"--branch=default",
b"--",
"https://hg.repo/",
str(repo_checkout),
]
assert not parent_folder.exists()