зеркало из https://github.com/mozilla/bugbug.git
Fetch private issue contents to replace public issue body and title in the dataset to account for moderation workflow (#2387)
This commit is contained in:
Родитель
750e6a73c5
Коммит
d1cb24fb78
|
@ -13,6 +13,7 @@ def rollback(issue, when=None):
|
|||
if (
|
||||
event["event"] == "renamed"
|
||||
and event["rename"]["from"] != "In the moderation queue."
|
||||
and event["rename"]["from"] != "Issue closed."
|
||||
):
|
||||
issue["title"] = event["rename"]["from"]
|
||||
|
||||
|
|
|
@ -8,9 +8,11 @@ import errno
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import socket
|
||||
import subprocess
|
||||
import tarfile
|
||||
import urllib.parse
|
||||
from collections import deque
|
||||
from contextlib import contextmanager
|
||||
from datetime import datetime
|
||||
|
@ -468,3 +470,29 @@ def get_hgmo_stack(branch: str, revision: str) -> List[bytes]:
|
|||
|
||||
def get_physical_cpu_count() -> int:
|
||||
return psutil.cpu_count(logical=False)
|
||||
|
||||
|
||||
def extract_metadata(body: str) -> dict:
|
||||
"""Extract metadata as dict from github issue body.
|
||||
|
||||
Extract all metadata items and return a dictionary
|
||||
Example metadata format: <!-- @public_url: *** -->
|
||||
"""
|
||||
match_list = re.findall(r"<!--\s@(\w+):\s(.+)\s-->", body)
|
||||
return dict(match_list)
|
||||
|
||||
|
||||
def extract_private(issue_body: str) -> Optional[tuple]:
|
||||
"""Extract private issue information from public issue body
|
||||
|
||||
Parse public issue body and extract private issue number and
|
||||
its owner/repository (webcompat repository usecase)
|
||||
"""
|
||||
private_url = extract_metadata(issue_body).get("private_url", "").strip()
|
||||
private_issue_path = urllib.parse.urlparse(private_url).path
|
||||
|
||||
if private_issue_path:
|
||||
owner, repo, _, number = tuple(private_issue_path.split("/")[1:])
|
||||
return owner, repo, number
|
||||
|
||||
return None
|
||||
|
|
|
@ -245,6 +245,7 @@ tasks:
|
|||
- --owner=webcompat
|
||||
- --repo=web-bugs
|
||||
- --retrieve-events
|
||||
- --retrieve-private
|
||||
|
||||
artifacts:
|
||||
public/github_issues.json.zst:
|
||||
|
|
|
@ -5,16 +5,48 @@
|
|||
|
||||
import argparse
|
||||
from logging import getLogger
|
||||
from typing import List, Tuple
|
||||
|
||||
from bugbug import db, github
|
||||
from bugbug.utils import zstd_compress
|
||||
from bugbug.github import IssueDict
|
||||
from bugbug.utils import extract_private, zstd_compress
|
||||
|
||||
logger = getLogger(__name__)
|
||||
|
||||
|
||||
def replace_with_private(original_data: List[IssueDict]) -> Tuple[List[IssueDict], set]:
|
||||
"""Replace title and body of automatically closed public issues.
|
||||
|
||||
Replace them with title and body of a corresponding private issue
|
||||
to account for moderation workflow in webcompat repository
|
||||
"""
|
||||
updated_ids = set()
|
||||
updated_issues = []
|
||||
for item in original_data:
|
||||
if item["title"] == "Issue closed.":
|
||||
extracted = extract_private(item["body"])
|
||||
if extracted is None:
|
||||
continue
|
||||
|
||||
owner, repo, issue_number = extracted
|
||||
private_issue = github.fetch_issue_by_number(owner, repo, issue_number)
|
||||
if private_issue:
|
||||
item["title"] = private_issue["title"]
|
||||
item["body"] = private_issue["body"]
|
||||
updated_ids.add(item["id"])
|
||||
updated_issues.append(item)
|
||||
|
||||
return updated_issues, updated_ids
|
||||
|
||||
|
||||
class Retriever(object):
|
||||
def retrieve_issues(
|
||||
self, owner: str, repo: str, state: str, retrieve_events: bool
|
||||
self,
|
||||
owner: str,
|
||||
repo: str,
|
||||
state: str,
|
||||
retrieve_events: bool,
|
||||
retrieve_private: bool,
|
||||
) -> None:
|
||||
|
||||
last_modified = None
|
||||
|
@ -33,6 +65,12 @@ class Retriever(object):
|
|||
owner, repo, state, last_modified.isoformat(), retrieve_events
|
||||
)
|
||||
|
||||
if retrieve_private:
|
||||
logger.info(
|
||||
"Replacing contents of auto closed public issues with private issues content"
|
||||
)
|
||||
replace_with_private(data)
|
||||
|
||||
updated_ids = set(issue["id"] for issue in data)
|
||||
|
||||
logger.info(
|
||||
|
@ -46,6 +84,20 @@ class Retriever(object):
|
|||
logger.info("Retrieving all issues since last_modified is not available")
|
||||
github.download_issues(owner, repo, state, retrieve_events)
|
||||
|
||||
if retrieve_private:
|
||||
logger.info(
|
||||
"Replacing contents of auto closed public issues with private issues content"
|
||||
)
|
||||
|
||||
all_issues = list(github.get_issues())
|
||||
updated_issues, updated_ids = replace_with_private(all_issues)
|
||||
|
||||
logger.info(
|
||||
"Deleting public issues that were updated and saving updates"
|
||||
)
|
||||
github.delete_issues(lambda issue: issue["id"] in updated_ids)
|
||||
db.append(github.GITHUB_ISSUES_DB, updated_issues)
|
||||
|
||||
zstd_compress(github.GITHUB_ISSUES_DB)
|
||||
|
||||
|
||||
|
@ -75,12 +127,19 @@ def main() -> None:
|
|||
action="store_true",
|
||||
help="Whether to retrieve events for each issue.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--retrieve-private",
|
||||
action="store_true",
|
||||
help="Whether to retrieve private issue content (only webcompat repository usecase).",
|
||||
)
|
||||
|
||||
# Parse args to show the help if `--help` is passed
|
||||
args = parser.parse_args()
|
||||
|
||||
retriever = Retriever()
|
||||
retriever.retrieve_issues(args.owner, args.repo, args.state, args.retrieve_events)
|
||||
retriever.retrieve_issues(
|
||||
args.owner, args.repo, args.state, args.retrieve_events, args.retrieve_private
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from unittest import mock
|
||||
|
||||
import responses
|
||||
|
||||
from bugbug import github
|
||||
from bugbug.github import IssueDict
|
||||
from scripts import github_issue_retriever
|
||||
|
||||
github.get_token = mock.Mock(return_value="mocked_token")
|
||||
|
||||
PUBLIC_BODY = """
|
||||
<p>Thanks for the report. We have closed this issue\n
|
||||
automatically as we suspect it is invalid. If we made
|
||||
a mistake, please\nfile a new issue and try to provide
|
||||
more context.</p>\n
|
||||
<!-- @private_url: https://github.com/webcompat/web-bugs-private/issues/12345 -->\n
|
||||
"""
|
||||
|
||||
|
||||
def test_replace_with_private() -> None:
|
||||
public_closed_issue = IssueDict(
|
||||
{"title": "Issue closed.", "body": PUBLIC_BODY, "id": 3456}
|
||||
)
|
||||
|
||||
public_open_issue = IssueDict(
|
||||
{"title": "example.com - test issue", "body": "issue body", "id": 3457}
|
||||
)
|
||||
|
||||
data = [
|
||||
public_closed_issue,
|
||||
public_open_issue,
|
||||
]
|
||||
|
||||
private_issue = IssueDict(
|
||||
{
|
||||
"title": "www.example.com - actual title",
|
||||
"body": "<p>Actual body</p>",
|
||||
"id": 1,
|
||||
}
|
||||
)
|
||||
# Mock private issue request
|
||||
responses.add(
|
||||
responses.GET,
|
||||
"https://api.github.com/repos/webcompat/web-bugs-private/issues/12345",
|
||||
json=private_issue,
|
||||
status=200,
|
||||
)
|
||||
|
||||
expected = IssueDict(public_closed_issue.copy())
|
||||
expected["title"] = private_issue["title"]
|
||||
expected["body"] = private_issue["body"]
|
||||
|
||||
(
|
||||
updated_issues,
|
||||
updated_ids,
|
||||
) = github_issue_retriever.replace_with_private(data)
|
||||
|
||||
assert len(updated_ids) == 1
|
||||
assert len(updated_issues) == 1
|
||||
assert len(data) == 2
|
||||
|
||||
assert public_closed_issue["id"] in updated_ids
|
||||
# assert that public issue in the original list is changed
|
||||
assert data[0] == expected
|
||||
# assert that updated list contains an issue with private content
|
||||
assert updated_issues[0] == expected
|
||||
|
||||
|
||||
def test_replace_missing_private() -> None:
|
||||
public_closed_issue_no_private = IssueDict(
|
||||
{"title": "Issue closed.", "body": "no private link", "id": 3459}
|
||||
)
|
||||
|
||||
public_open_issue = IssueDict(
|
||||
{"title": "example.com - test issue 2", "body": "issue body", "id": 3458}
|
||||
)
|
||||
|
||||
data = [public_closed_issue_no_private, public_open_issue]
|
||||
expected = IssueDict(public_closed_issue_no_private.copy())
|
||||
|
||||
(
|
||||
updated_issues,
|
||||
updated_ids,
|
||||
) = github_issue_retriever.replace_with_private(data)
|
||||
|
||||
assert len(updated_ids) == 0
|
||||
assert len(updated_issues) == 0
|
||||
assert len(data) == 2
|
||||
assert data[0] == expected
|
|
@ -293,3 +293,37 @@ def test_extract_db_bad_format(tmp_path):
|
|||
|
||||
with pytest.raises(AssertionError):
|
||||
utils.extract_file(path)
|
||||
|
||||
|
||||
def test_extract_metadata() -> None:
|
||||
body = """
|
||||
<!-- @private_url: https://github.com/webcompat/web-bugs-private/issues/12345 -->\n
|
||||
"""
|
||||
|
||||
expected = {
|
||||
"private_url": "https://github.com/webcompat/web-bugs-private/issues/12345"
|
||||
}
|
||||
result = utils.extract_metadata(body)
|
||||
assert result == expected
|
||||
|
||||
result = utils.extract_metadata("test")
|
||||
assert result == {}
|
||||
|
||||
|
||||
def test_extract_private_url() -> None:
|
||||
body = """
|
||||
<p>Thanks for the report. We have closed this issue\n
|
||||
automatically as we suspect it is invalid. If we made
|
||||
a mistake, please\nfile a new issue and try to provide
|
||||
more context.</p>\n
|
||||
<!-- @private_url: https://github.com/webcompat/web-bugs-private/issues/12345 -->\n
|
||||
"""
|
||||
expected = ("webcompat", "web-bugs-private", "12345")
|
||||
result = utils.extract_private(body)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_extract_private_url_empty() -> None:
|
||||
body = """<p>Test content</p> """
|
||||
result = utils.extract_private(body)
|
||||
assert result is None
|
||||
|
|
Загрузка…
Ссылка в новой задаче