Support generating mirror repositories with comments removed

This commit is contained in:
Marco Castelluccio 2019-07-23 02:10:31 +02:00
Родитель fbaef0661d
Коммит ab048e0a6b
2 изменённых файлов: 34 добавлений и 13 удалений

Просмотреть файл

@ -8,7 +8,7 @@ tasks:
minute: {$eval: 'now[14:16]'} minute: {$eval: 'now[14:16]'}
second: {$eval: 'now[17:19]'} second: {$eval: 'now[17:19]'}
in: in:
- ID: microannotate-generate - ID: microannotate-generate-tokenize
created: {$fromNow: ''} created: {$fromNow: ''}
deadline: {$fromNow: '72 hours'} deadline: {$fromNow: '72 hours'}
expires: {$fromNow: '1 month'} expires: {$fromNow: '1 month'}
@ -22,6 +22,8 @@ tasks:
command: command:
- bugbug-microannotate-generate - bugbug-microannotate-generate
- /cache/ - /cache/
- https://github.com/marco-c/gecko-dev-wordified
- --tokenize
artifacts: artifacts:
public/done: public/done:
path: /done path: /done
@ -38,12 +40,11 @@ tasks:
- notify.email.release-mgmt-analysis@mozilla.com.on-failed" - notify.email.release-mgmt-analysis@mozilla.com.on-failed"
- notify.irc-channel.#bugbug.on-failed - notify.irc-channel.#bugbug.on-failed
metadata: metadata:
name: bugbug microannotate repository generator name: bugbug microannotate tokenized repository generator
description: bugbug microannotate repository generator description: bugbug microannotate tokenized repository generator
owner: release-mgmt-analysis@mozilla.com owner: release-mgmt-analysis@mozilla.com
source: https://github.com/mozilla/bugbug/raw/master/annotate-pipeline.yml source: https://github.com/mozilla/bugbug/raw/master/annotate-pipeline.yml
- ID: regressor-finder - ID: regressor-finder
created: {$fromNow: ''} created: {$fromNow: ''}
deadline: {$fromNow: '118 hours'} deadline: {$fromNow: '118 hours'}

Просмотреть файл

@ -15,8 +15,11 @@ logger = getLogger(__name__)
class MicroannotateGenerator(object): class MicroannotateGenerator(object):
def __init__(self, cache_root): def __init__(self, cache_root, repo_url, tokenize, remove_comments):
self.cache_root = cache_root self.cache_root = cache_root
self.repo_url = repo_url
self.tokenize = tokenize
self.remove_comments = remove_comments
assert os.path.isdir(cache_root), f"Cache root {cache_root} is not a dir." assert os.path.isdir(cache_root), f"Cache root {cache_root} is not a dir."
self.repo_dir = os.path.join(cache_root, "mozilla-central") self.repo_dir = os.path.join(cache_root, "mozilla-central")
@ -29,22 +32,21 @@ class MicroannotateGenerator(object):
git_user = get_secret("GIT_USER") git_user = get_secret("GIT_USER")
git_password = get_secret("GIT_PASSWORD") git_password = get_secret("GIT_PASSWORD")
repo_url = "https://github.com/marco-c/gecko-dev-wordified" repo_push_url = self.repo_url.replace(
repo_push_url = ( "https://", f"https://{git_user}:{git_password}@"
f"https://{git_user}:{git_password}@github.com/marco-c/gecko-dev-wordified"
) )
git_repo_path = os.path.basename(repo_url) git_repo_path = os.path.basename(self.repo_url)
retry( retry(
lambda: subprocess.run( lambda: subprocess.run(
["git", "clone", repo_url, git_repo_path], check=True ["git", "clone", self.repo_url, git_repo_path], check=True
) )
) )
try: try:
retry( retry(
lambda: subprocess.run( lambda: subprocess.run(
["git", "pull", repo_url, "master"], ["git", "pull", self.repo_url, "master"],
cwd=git_repo_path, cwd=git_repo_path,
capture_output=True, capture_output=True,
check=True, check=True,
@ -55,7 +57,13 @@ class MicroannotateGenerator(object):
if b"Couldn't find remote ref master" in e.stdout: if b"Couldn't find remote ref master" in e.stdout:
pass pass
done = generator.generate(self.repo_dir, git_repo_path, limit=10000) done = generator.generate(
self.repo_dir,
git_repo_path,
limit=10000,
tokenize=self.tokenize,
remove_comments=self.remove_comments,
)
with open("done", "w") as f: with open("done", "w") as f:
f.write(str(1 if done else 0)) f.write(str(1 if done else 0))
@ -77,9 +85,21 @@ def main():
parser = argparse.ArgumentParser(description=description) parser = argparse.ArgumentParser(description=description)
parser.add_argument("cache-root", help="Cache for repository clones.") parser.add_argument("cache-root", help="Cache for repository clones.")
parser.add_argument("repo-url", help="Mirror repository URL.")
parser.add_argument(
"--tokenize", help="Enable word-level tokenization.", action="store_true"
)
parser.add_argument(
"--remove-comments", help="Enable comment removal.", action="store_true"
)
args = parser.parse_args() args = parser.parse_args()
generator = MicroannotateGenerator(getattr(args, "cache-root")) generator = MicroannotateGenerator(
getattr(args, "cache-root"),
getattr(args, "repo-url"),
args.tokenize,
args.remove_comments,
)
generator.generate() generator.generate()