зеркало из https://github.com/mozilla/bugbug.git
[inline_comments_data_collection] Run the script on Taskcluster (#4454)
This commit is contained in:
Родитель
ac48a79728
Коммит
23b60ecb75
|
@ -27,6 +27,13 @@ db.register(
|
|||
4,
|
||||
)
|
||||
|
||||
FIXED_COMMENTS_DB = "data/fixed_comments.json"
|
||||
db.register(
|
||||
REVISIONS_DB,
|
||||
"https://community-tc.services.mozilla.com/api/index/v1/task/project.bugbug.fixed_comments.latest/artifacts/public/fixed_comments.json.zst",
|
||||
1,
|
||||
)
|
||||
|
||||
PHABRICATOR_API = None
|
||||
|
||||
TESTING_PROJECTS = {
|
||||
|
|
|
@ -234,6 +234,45 @@ tasks:
|
|||
owner: release-mgmt-analysis@mozilla.com
|
||||
source: ${repository}/raw/master/data-pipeline.yml
|
||||
|
||||
- ID: fixed-comments-retrieval
|
||||
created: { $fromNow: "" }
|
||||
deadline: { $fromNow: "2 days" }
|
||||
expires: { $fromNow: "1 year" }
|
||||
provisionerId: proj-bugbug
|
||||
workerType: compute-small
|
||||
dependencies:
|
||||
- revisions-retrieval
|
||||
payload:
|
||||
env:
|
||||
TC_SECRET_ID: project/bugbug/production
|
||||
maxRunTime: 86400
|
||||
image: mozilla/bugbug-base:${version}
|
||||
command:
|
||||
- "bugbug-fixed-comments"
|
||||
|
||||
artifacts:
|
||||
public/bugs.json.zst:
|
||||
path: /data/fixed_comments.json.zst
|
||||
type: file
|
||||
public/bugs.json.version:
|
||||
path: /data/fixed_comments.json.version
|
||||
type: file
|
||||
|
||||
features:
|
||||
taskclusterProxy: true
|
||||
scopes:
|
||||
- "secrets:get:project/bugbug/production"
|
||||
routes:
|
||||
- notify.email.release-mgmt-analysis@mozilla.com.on-failed
|
||||
- notify.irc-channel.#bugbug.on-failed
|
||||
- index.project.bugbug.fixed_comments.${version}
|
||||
- index.project.bugbug.fixed_comments.latest
|
||||
metadata:
|
||||
name: bugbug fixed comments retrieval
|
||||
description: bugbug fixed comments retrieval
|
||||
owner: release-mgmt-analysis@mozilla.com
|
||||
source: ${repository}/raw/master/data-pipeline.yml
|
||||
|
||||
- ID: issues-retrieval
|
||||
created: { $fromNow: "" }
|
||||
deadline: { $fromNow: "2 days" }
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
|
||||
import orjson
|
||||
import requests
|
||||
from libmozdata.phabricator import PhabricatorAPI
|
||||
|
||||
from bugbug import phabricator
|
||||
from bugbug.tools.code_review import PhabricatorReviewData
|
||||
from bugbug.utils import get_secret
|
||||
from bugbug.utils import get_secret, zstd_compress
|
||||
|
||||
review_data = PhabricatorReviewData()
|
||||
|
||||
|
@ -183,11 +184,13 @@ def process_comments(patch_threshold, diff_length_threshold):
|
|||
|
||||
def main():
|
||||
os.makedirs("patches", exist_ok=True)
|
||||
os.makedirs("dataset", exist_ok=True)
|
||||
dataset_file_path = "dataset/inline_comment_dataset2.json"
|
||||
with open(dataset_file_path, "a") as dataset_file_handle:
|
||||
os.makedirs("data", exist_ok=True)
|
||||
|
||||
with open(phabricator.FIXED_COMMENTS_DB, "wb") as dataset_file_handle:
|
||||
for data in process_comments(patch_threshold=1000, diff_length_threshold=5000):
|
||||
dataset_file_handle.write(json.dumps(data) + "\n")
|
||||
dataset_file_handle.write(orjson.dumps(data) + b"\n")
|
||||
|
||||
zstd_compress(phabricator.FIXED_COMMENTS_DB)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
Загрузка…
Ссылка в новой задаче