Support generating mirror repositories with comments removed

This commit is contained in:
Marco Castelluccio 2019-07-23 02:10:31 +02:00
Родитель fbaef0661d
Коммит ab048e0a6b
2 изменённых файлов: 34 добавлений и 13 удалений

Просмотреть файл

@ -8,7 +8,7 @@ tasks:
minute: {$eval: 'now[14:16]'}
second: {$eval: 'now[17:19]'}
in:
- ID: microannotate-generate
- ID: microannotate-generate-tokenize
created: {$fromNow: ''}
deadline: {$fromNow: '72 hours'}
expires: {$fromNow: '1 month'}
@ -22,6 +22,8 @@ tasks:
command:
- bugbug-microannotate-generate
- /cache/
- https://github.com/marco-c/gecko-dev-wordified
- --tokenize
artifacts:
public/done:
path: /done
@ -38,12 +40,11 @@ tasks:
- notify.email.release-mgmt-analysis@mozilla.com.on-failed"
- notify.irc-channel.#bugbug.on-failed
metadata:
name: bugbug microannotate repository generator
description: bugbug microannotate repository generator
name: bugbug microannotate tokenized repository generator
description: bugbug microannotate tokenized repository generator
owner: release-mgmt-analysis@mozilla.com
source: https://github.com/mozilla/bugbug/raw/master/annotate-pipeline.yml
- ID: regressor-finder
created: {$fromNow: ''}
deadline: {$fromNow: '118 hours'}

Просмотреть файл

@ -15,8 +15,11 @@ logger = getLogger(__name__)
class MicroannotateGenerator(object):
def __init__(self, cache_root):
def __init__(self, cache_root, repo_url, tokenize, remove_comments):
self.cache_root = cache_root
self.repo_url = repo_url
self.tokenize = tokenize
self.remove_comments = remove_comments
assert os.path.isdir(cache_root), f"Cache root {cache_root} is not a dir."
self.repo_dir = os.path.join(cache_root, "mozilla-central")
@ -29,22 +32,21 @@ class MicroannotateGenerator(object):
git_user = get_secret("GIT_USER")
git_password = get_secret("GIT_PASSWORD")
repo_url = "https://github.com/marco-c/gecko-dev-wordified"
repo_push_url = (
f"https://{git_user}:{git_password}@github.com/marco-c/gecko-dev-wordified"
repo_push_url = self.repo_url.replace(
"https://", f"https://{git_user}:{git_password}@"
)
git_repo_path = os.path.basename(repo_url)
git_repo_path = os.path.basename(self.repo_url)
retry(
lambda: subprocess.run(
["git", "clone", repo_url, git_repo_path], check=True
["git", "clone", self.repo_url, git_repo_path], check=True
)
)
try:
retry(
lambda: subprocess.run(
["git", "pull", repo_url, "master"],
["git", "pull", self.repo_url, "master"],
cwd=git_repo_path,
capture_output=True,
check=True,
@ -55,7 +57,13 @@ class MicroannotateGenerator(object):
if b"Couldn't find remote ref master" in e.stdout:
pass
done = generator.generate(self.repo_dir, git_repo_path, limit=10000)
done = generator.generate(
self.repo_dir,
git_repo_path,
limit=10000,
tokenize=self.tokenize,
remove_comments=self.remove_comments,
)
with open("done", "w") as f:
f.write(str(1 if done else 0))
@ -77,9 +85,21 @@ def main():
parser = argparse.ArgumentParser(description=description)
parser.add_argument("cache-root", help="Cache for repository clones.")
parser.add_argument("repo-url", help="Mirror repository URL.")
parser.add_argument(
"--tokenize", help="Enable word-level tokenization.", action="store_true"
)
parser.add_argument(
"--remove-comments", help="Enable comment removal.", action="store_true"
)
args = parser.parse_args()
generator = MicroannotateGenerator(getattr(args, "cache-root"))
generator = MicroannotateGenerator(
getattr(args, "cache-root"),
getattr(args, "repo-url"),
args.tokenize,
args.remove_comments,
)
generator.generate()