[inline_comments_data_collection] Run the script on Taskcluster (#4454)

This commit is contained in:
Benjamin Mah 2024-09-06 05:11:00 -04:00 коммит произвёл GitHub
Родитель ac48a79728
Коммит 23b60ecb75
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
3 изменённых файлов: 55 добавлений и 6 удалений

Просмотреть файл

@ -27,6 +27,13 @@ db.register(
4,
)
FIXED_COMMENTS_DB = "data/fixed_comments.json"
db.register(
REVISIONS_DB,
"https://community-tc.services.mozilla.com/api/index/v1/task/project.bugbug.fixed_comments.latest/artifacts/public/fixed_comments.json.zst",
1,
)
PHABRICATOR_API = None
TESTING_PROJECTS = {

Просмотреть файл

@ -234,6 +234,45 @@ tasks:
owner: release-mgmt-analysis@mozilla.com
source: ${repository}/raw/master/data-pipeline.yml
- ID: fixed-comments-retrieval
created: { $fromNow: "" }
deadline: { $fromNow: "2 days" }
expires: { $fromNow: "1 year" }
provisionerId: proj-bugbug
workerType: compute-small
dependencies:
- revisions-retrieval
payload:
env:
TC_SECRET_ID: project/bugbug/production
maxRunTime: 86400
image: mozilla/bugbug-base:${version}
command:
- "bugbug-fixed-comments"
artifacts:
public/bugs.json.zst:
path: /data/fixed_comments.json.zst
type: file
public/bugs.json.version:
path: /data/fixed_comments.json.version
type: file
features:
taskclusterProxy: true
scopes:
- "secrets:get:project/bugbug/production"
routes:
- notify.email.release-mgmt-analysis@mozilla.com.on-failed
- notify.irc-channel.#bugbug.on-failed
- index.project.bugbug.fixed_comments.${version}
- index.project.bugbug.fixed_comments.latest
metadata:
name: bugbug fixed comments retrieval
description: bugbug fixed comments retrieval
owner: release-mgmt-analysis@mozilla.com
source: ${repository}/raw/master/data-pipeline.yml
- ID: issues-retrieval
created: { $fromNow: "" }
deadline: { $fromNow: "2 days" }

Просмотреть файл

@ -1,13 +1,14 @@
import json
import logging
import os
import re
import orjson
import requests
from libmozdata.phabricator import PhabricatorAPI
from bugbug import phabricator
from bugbug.tools.code_review import PhabricatorReviewData
from bugbug.utils import get_secret
from bugbug.utils import get_secret, zstd_compress
review_data = PhabricatorReviewData()
@ -183,11 +184,13 @@ def process_comments(patch_threshold, diff_length_threshold):
def main():
os.makedirs("patches", exist_ok=True)
os.makedirs("dataset", exist_ok=True)
dataset_file_path = "dataset/inline_comment_dataset2.json"
with open(dataset_file_path, "a") as dataset_file_handle:
os.makedirs("data", exist_ok=True)
with open(phabricator.FIXED_COMMENTS_DB, "wb") as dataset_file_handle:
for data in process_comments(patch_threshold=1000, diff_length_threshold=5000):
dataset_file_handle.write(json.dumps(data) + "\n")
dataset_file_handle.write(orjson.dumps(data) + b"\n")
zstd_compress(phabricator.FIXED_COMMENTS_DB)
if __name__ == "__main__":