зеркало из https://github.com/mozilla/bugbug.git
Add support for multiple DBs for github (#2465)
Fixes #2460 Co-authored-by: Marco Castelluccio <mcastelluccio@mozilla.com>
This commit is contained in:
Родитель
819358b139
Коммит
627e182061
205
bugbug/github.py
205
bugbug/github.py
|
@ -14,141 +14,142 @@ from bugbug.utils import get_secret
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
GITHUB_ISSUES_DB = "data/github_issues.json"
|
|
||||||
db.register(
|
|
||||||
GITHUB_ISSUES_DB,
|
|
||||||
"https://community-tc.services.mozilla.com/api/index/v1/task/project.bugbug.data_github_issues.latest/artifacts/public/github_issues.json.zst",
|
|
||||||
1,
|
|
||||||
)
|
|
||||||
|
|
||||||
IssueDict = NewType("IssueDict", dict)
|
IssueDict = NewType("IssueDict", dict)
|
||||||
|
|
||||||
|
DB_VERSION = 1
|
||||||
|
DB_URL = "https://community-tc.services.mozilla.com/api/index/v1/task/project.bugbug.data_github_{}_{}_issues.latest/artifacts/public/github_{}_{}_issues.json.zst"
|
||||||
|
|
||||||
PER_PAGE = 100
|
PER_PAGE = 100
|
||||||
# Rate limit period in seconds
|
# Rate limit period in seconds
|
||||||
RATE_LIMIT_PERIOD = 900
|
RATE_LIMIT_PERIOD = 900
|
||||||
|
|
||||||
|
|
||||||
def get_issues() -> Iterator[IssueDict]:
|
class Github:
|
||||||
yield from db.read(GITHUB_ISSUES_DB)
|
def __init__(
|
||||||
|
self, owner: str, repo: str, state: str = "all", retrieve_events: bool = False
|
||||||
|
) -> None:
|
||||||
|
self.owner = owner
|
||||||
|
self.repo = repo
|
||||||
|
self.state = state
|
||||||
|
self.retrieve_events = retrieve_events
|
||||||
|
|
||||||
|
self.db_path = "data/github_{}_{}_issues.json".format(self.owner, self.repo)
|
||||||
|
|
||||||
def delete_issues(match: Callable[[IssueDict], bool]) -> None:
|
if not db.is_registered(self.db_path):
|
||||||
db.delete(GITHUB_ISSUES_DB, match)
|
db.register(
|
||||||
|
self.db_path,
|
||||||
|
DB_URL.format(self.owner, self.repo, self.owner, self.repo),
|
||||||
|
DB_VERSION,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_issues(self) -> Iterator[IssueDict]:
|
||||||
|
yield from db.read(self.db_path)
|
||||||
|
|
||||||
@sleep_and_retry
|
def delete_issues(self, match: Callable[[IssueDict], bool]) -> None:
|
||||||
@limits(calls=1200, period=RATE_LIMIT_PERIOD)
|
db.delete(self.db_path, match)
|
||||||
def api_limit():
|
|
||||||
# Allow a limited number of requests to account for rate limiting
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
@sleep_and_retry
|
||||||
|
@limits(calls=1200, period=RATE_LIMIT_PERIOD)
|
||||||
|
def api_limit(self):
|
||||||
|
# Allow a limited number of requests to account for rate limiting
|
||||||
|
pass
|
||||||
|
|
||||||
def get_token() -> str:
|
def get_token(self) -> str:
|
||||||
return get_secret("GITHUB_TOKEN")
|
return get_secret("GITHUB_TOKEN")
|
||||||
|
|
||||||
|
def fetch_events(self, events_url: str) -> list:
|
||||||
|
self.api_limit()
|
||||||
|
logger.info(f"Fetching {events_url}")
|
||||||
|
headers = {"Authorization": "token {}".format(self.get_token())}
|
||||||
|
response = requests.get(events_url, headers=headers)
|
||||||
|
response.raise_for_status()
|
||||||
|
events_raw = response.json()
|
||||||
|
return events_raw
|
||||||
|
|
||||||
def fetch_events(events_url: str) -> list:
|
def fetch_issues(
|
||||||
api_limit()
|
self, url: str, retrieve_events: bool, params: dict = None
|
||||||
logger.info(f"Fetching {events_url}")
|
) -> Tuple[List[IssueDict], dict]:
|
||||||
headers = {"Authorization": "token {}".format(get_token())}
|
self.api_limit()
|
||||||
response = requests.get(events_url, headers=headers)
|
headers = {"Authorization": "token {}".format(self.get_token())}
|
||||||
response.raise_for_status()
|
response = requests.get(url, params=params, headers=headers)
|
||||||
events_raw = response.json()
|
response.raise_for_status()
|
||||||
return events_raw
|
data = response.json()
|
||||||
|
|
||||||
|
# If only one issue is requested, add it to a list
|
||||||
|
if isinstance(data, dict):
|
||||||
|
data = [data]
|
||||||
|
|
||||||
def fetch_issues(
|
logger.info(f"Fetching {url}")
|
||||||
url: str, retrieve_events: bool, params: dict = None
|
|
||||||
) -> Tuple[List[IssueDict], dict]:
|
|
||||||
api_limit()
|
|
||||||
headers = {"Authorization": "token {}".format(get_token())}
|
|
||||||
response = requests.get(url, params=params, headers=headers)
|
|
||||||
response.raise_for_status()
|
|
||||||
data = response.json()
|
|
||||||
|
|
||||||
# If only one issue is requested, add it to a list
|
if retrieve_events:
|
||||||
if isinstance(data, dict):
|
for item in data:
|
||||||
data = [data]
|
events = self.fetch_events(item["events_url"])
|
||||||
|
item.update({"events": events})
|
||||||
|
|
||||||
logger.info(f"Fetching {url}")
|
return data, response.links
|
||||||
|
|
||||||
if retrieve_events:
|
def get_start_page(self) -> int:
|
||||||
for item in data:
|
# Determine next page to fetch based on number of downloaded issues
|
||||||
events = fetch_events(item["events_url"])
|
issues = self.get_issues()
|
||||||
item.update({"events": events})
|
count = sum(1 for _ in issues)
|
||||||
|
return int(count / PER_PAGE) + 1
|
||||||
|
|
||||||
return data, response.links
|
def fetch_issues_updated_since_timestamp(self, since: str) -> List[IssueDict]:
|
||||||
|
# Fetches changed and new issues since a specified timestamp
|
||||||
|
url = "https://api.github.com/repos/{}/{}/issues".format(self.owner, self.repo)
|
||||||
|
|
||||||
|
params = {"state": self.state, "since": since, "per_page": PER_PAGE, "page": 1}
|
||||||
|
|
||||||
def get_start_page() -> int:
|
data, response_links = self.fetch_issues(
|
||||||
# Determine next page to fetch based on number of downloaded issues
|
url=url, retrieve_events=self.retrieve_events, params=params
|
||||||
issues = get_issues()
|
|
||||||
count = sum(1 for _ in issues)
|
|
||||||
return int(count / PER_PAGE) + 1
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_issues_updated_since_timestamp(
|
|
||||||
owner: str, repo: str, state: str, since: str, retrieve_events: bool = False
|
|
||||||
) -> List[IssueDict]:
|
|
||||||
# Fetches changed and new issues since a specified timestamp
|
|
||||||
url = "https://api.github.com/repos/{}/{}/issues".format(owner, repo)
|
|
||||||
|
|
||||||
params = {"state": state, "since": since, "per_page": PER_PAGE, "page": 1}
|
|
||||||
|
|
||||||
data, response_links = fetch_issues(
|
|
||||||
url=url, retrieve_events=retrieve_events, params=params
|
|
||||||
)
|
|
||||||
|
|
||||||
# Fetch next page
|
|
||||||
while "next" in response_links.keys():
|
|
||||||
next_page_data, response_links = fetch_issues(
|
|
||||||
response_links["next"]["url"], retrieve_events
|
|
||||||
)
|
)
|
||||||
data += next_page_data
|
|
||||||
|
|
||||||
logger.info("Done fetching updates")
|
# Fetch next page
|
||||||
|
while "next" in response_links.keys():
|
||||||
|
next_page_data, response_links = self.fetch_issues(
|
||||||
|
response_links["next"]["url"], self.retrieve_events
|
||||||
|
)
|
||||||
|
data += next_page_data
|
||||||
|
|
||||||
return data
|
logger.info("Done fetching updates")
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
def download_issues(
|
def download_issues(self) -> None:
|
||||||
owner: str, repo: str, state: str, retrieve_events: bool = False
|
# Fetches all issues sorted by date of creation in ascending order
|
||||||
) -> None:
|
url = "https://api.github.com/repos/{}/{}/issues".format(self.owner, self.repo)
|
||||||
# Fetches all issues sorted by date of creation in ascending order
|
start_page = self.get_start_page()
|
||||||
url = "https://api.github.com/repos/{}/{}/issues".format(owner, repo)
|
|
||||||
start_page = get_start_page()
|
|
||||||
|
|
||||||
params = {
|
params = {
|
||||||
"state": state,
|
"state": self.state,
|
||||||
"sort": "created",
|
"sort": "created",
|
||||||
"direction": "asc",
|
"direction": "asc",
|
||||||
"per_page": PER_PAGE,
|
"per_page": PER_PAGE,
|
||||||
"page": start_page,
|
"page": start_page,
|
||||||
}
|
}
|
||||||
|
|
||||||
data, response_links = fetch_issues(
|
data, response_links = self.fetch_issues(
|
||||||
url=url, retrieve_events=retrieve_events, params=params
|
url=url, retrieve_events=self.retrieve_events, params=params
|
||||||
)
|
|
||||||
|
|
||||||
db.append(GITHUB_ISSUES_DB, data)
|
|
||||||
# Fetch next page
|
|
||||||
while "next" in response_links.keys():
|
|
||||||
next_page_data, response_links = fetch_issues(
|
|
||||||
response_links["next"]["url"], retrieve_events
|
|
||||||
)
|
)
|
||||||
db.append(GITHUB_ISSUES_DB, next_page_data)
|
|
||||||
|
|
||||||
logger.info("Done downloading")
|
db.append(self.db_path, data)
|
||||||
|
# Fetch next page
|
||||||
|
while "next" in response_links.keys():
|
||||||
|
next_page_data, response_links = self.fetch_issues(
|
||||||
|
response_links["next"]["url"], self.retrieve_events
|
||||||
|
)
|
||||||
|
db.append(self.db_path, next_page_data)
|
||||||
|
|
||||||
|
logger.info("Done downloading")
|
||||||
|
|
||||||
def fetch_issue_by_number(
|
def fetch_issue_by_number(
|
||||||
owner: str, repo: str, issue_number: int, retrieve_events: bool = False
|
self, owner: str, repo: str, issue_number: int, retrieve_events: bool = False
|
||||||
) -> IssueDict:
|
) -> IssueDict:
|
||||||
# Fetches an issue by id
|
# Fetches an issue by id
|
||||||
url = "https://api.github.com/repos/{}/{}/issues/{}".format(
|
url = "https://api.github.com/repos/{}/{}/issues/{}".format(
|
||||||
owner, repo, issue_number
|
owner, repo, issue_number
|
||||||
)
|
)
|
||||||
|
|
||||||
data = fetch_issues(url=url, retrieve_events=retrieve_events)
|
data = self.fetch_issues(url=url, retrieve_events=retrieve_events)
|
||||||
|
|
||||||
return data[0][0]
|
return data[0][0]
|
||||||
|
|
|
@ -23,7 +23,8 @@ from sklearn.metrics import precision_recall_fscore_support
|
||||||
from sklearn.model_selection import cross_validate, train_test_split
|
from sklearn.model_selection import cross_validate, train_test_split
|
||||||
from tabulate import tabulate
|
from tabulate import tabulate
|
||||||
|
|
||||||
from bugbug import bugzilla, db, github, repository
|
from bugbug import bugzilla, db, repository
|
||||||
|
from bugbug.github import Github
|
||||||
from bugbug.nlp import SpacyVectorizer
|
from bugbug.nlp import SpacyVectorizer
|
||||||
from bugbug.utils import split_tuple_generator, to_array
|
from bugbug.utils import split_tuple_generator, to_array
|
||||||
|
|
||||||
|
@ -754,12 +755,14 @@ class BugCoupleModel(Model):
|
||||||
|
|
||||||
|
|
||||||
class IssueModel(Model):
|
class IssueModel(Model):
|
||||||
def __init__(self, lemmatization=False):
|
def __init__(self, owner, repo, lemmatization=False):
|
||||||
Model.__init__(self, lemmatization)
|
Model.__init__(self, lemmatization)
|
||||||
self.training_dbs = [github.GITHUB_ISSUES_DB]
|
|
||||||
|
self.github = Github(owner=owner, repo=repo)
|
||||||
|
self.training_dbs = [self.github.db_path]
|
||||||
|
|
||||||
def items_gen(self, classes):
|
def items_gen(self, classes):
|
||||||
for issue in github.get_issues():
|
for issue in self.github.get_issues():
|
||||||
issue_number = issue["number"]
|
issue_number = issue["number"]
|
||||||
if issue_number not in classes:
|
if issue_number not in classes:
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -9,7 +9,7 @@ import xgboost
|
||||||
from sklearn.compose import ColumnTransformer
|
from sklearn.compose import ColumnTransformer
|
||||||
from sklearn.pipeline import Pipeline
|
from sklearn.pipeline import Pipeline
|
||||||
|
|
||||||
from bugbug import feature_cleanup, github, issue_features, utils
|
from bugbug import feature_cleanup, issue_features, utils
|
||||||
from bugbug.model import IssueModel
|
from bugbug.model import IssueModel
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -17,7 +17,9 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class NeedsDiagnosisModel(IssueModel):
|
class NeedsDiagnosisModel(IssueModel):
|
||||||
def __init__(self, lemmatization=False):
|
def __init__(self, lemmatization=False):
|
||||||
IssueModel.__init__(self, lemmatization)
|
IssueModel.__init__(
|
||||||
|
self, owner="webcompat", repo="web-bugs", lemmatization=lemmatization
|
||||||
|
)
|
||||||
|
|
||||||
self.calculate_importance = False
|
self.calculate_importance = False
|
||||||
|
|
||||||
|
@ -59,10 +61,11 @@ class NeedsDiagnosisModel(IssueModel):
|
||||||
def get_labels(self):
|
def get_labels(self):
|
||||||
classes = {}
|
classes = {}
|
||||||
|
|
||||||
for issue in github.get_issues():
|
for issue in self.github.get_issues():
|
||||||
# Skip issues with empty title or body
|
# Skip issues with empty title or body
|
||||||
if issue["title"] is None or issue["body"] is None:
|
if issue["title"] is None or issue["body"] is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Skip issues that are not moderated yet as they don't have a meaningful title or body
|
# Skip issues that are not moderated yet as they don't have a meaningful title or body
|
||||||
if issue["title"] == "In the moderation queue.":
|
if issue["title"] == "In the moderation queue.":
|
||||||
continue
|
continue
|
||||||
|
|
|
@ -14,7 +14,8 @@ import requests
|
||||||
import zstandard
|
import zstandard
|
||||||
from redis import Redis
|
from redis import Redis
|
||||||
|
|
||||||
from bugbug import bugzilla, github, repository, test_scheduling
|
from bugbug import bugzilla, repository, test_scheduling
|
||||||
|
from bugbug.github import Github
|
||||||
from bugbug.model import Model
|
from bugbug.model import Model
|
||||||
from bugbug.utils import get_hgmo_stack
|
from bugbug.utils import get_hgmo_stack
|
||||||
from bugbug_http.readthrough_cache import ReadthroughTTLCache
|
from bugbug_http.readthrough_cache import ReadthroughTTLCache
|
||||||
|
@ -113,6 +114,8 @@ def classify_issue(
|
||||||
) -> str:
|
) -> str:
|
||||||
from bugbug_http.app import JobInfo
|
from bugbug_http.app import JobInfo
|
||||||
|
|
||||||
|
github = Github(owner=owner, repo=repo)
|
||||||
|
|
||||||
issue_ids_set = set(map(int, issue_nums))
|
issue_ids_set = set(map(int, issue_nums))
|
||||||
|
|
||||||
issues = {
|
issues = {
|
||||||
|
|
|
@ -249,11 +249,11 @@ tasks:
|
||||||
- --retrieve-private
|
- --retrieve-private
|
||||||
|
|
||||||
artifacts:
|
artifacts:
|
||||||
public/github_issues.json.zst:
|
public/github_webcompat_web-bugs_issues.json.zst:
|
||||||
path: /data/github_issues.json.zst
|
path: /data/github_webcompat_web-bugs_issues.json.zst
|
||||||
type: file
|
type: file
|
||||||
public/github_issues.json.version:
|
public/github_webcompat_web-bugs_issues.json.version:
|
||||||
path: /data/github_issues.json.version
|
path: /data/github_webcompat_web-bugs_issues.json.version
|
||||||
type: file
|
type: file
|
||||||
|
|
||||||
features:
|
features:
|
||||||
|
@ -263,8 +263,8 @@ tasks:
|
||||||
routes:
|
routes:
|
||||||
- notify.email.release-mgmt-analysis@mozilla.com.on-failed
|
- notify.email.release-mgmt-analysis@mozilla.com.on-failed
|
||||||
- notify.irc-channel.#bugbug.on-failed
|
- notify.irc-channel.#bugbug.on-failed
|
||||||
- index.project.bugbug.data_github_issues.${version}
|
- index.project.bugbug.data_github_webcompat_web-bugs_issues.${version}
|
||||||
- index.project.bugbug.data_github_issues.latest
|
- index.project.bugbug.data_github_webcompat_web-bugs_issues.latest
|
||||||
metadata:
|
metadata:
|
||||||
name: bugbug webcompat issues retrieval
|
name: bugbug webcompat issues retrieval
|
||||||
description: bugbug webcompat issues retrieval
|
description: bugbug webcompat issues retrieval
|
||||||
|
|
|
@ -7,7 +7,8 @@ from logging import INFO, basicConfig, getLogger
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from bugbug import db, github
|
from bugbug import db
|
||||||
|
from bugbug.github import Github
|
||||||
from bugbug.models import get_model_class
|
from bugbug.models import get_model_class
|
||||||
from bugbug.utils import download_model
|
from bugbug.utils import download_model
|
||||||
|
|
||||||
|
@ -34,13 +35,17 @@ def classify_issues(
|
||||||
model_class = get_model_class(model_name)
|
model_class = get_model_class(model_name)
|
||||||
model = model_class.load(model_file_name)
|
model = model_class.load(model_file_name)
|
||||||
|
|
||||||
|
github = Github(
|
||||||
|
owner=owner, repo=repo, state="all", retrieve_events=retrieve_events
|
||||||
|
)
|
||||||
|
|
||||||
if issue_number:
|
if issue_number:
|
||||||
issues = iter(
|
issues = iter(
|
||||||
[github.fetch_issue_by_number(owner, repo, issue_number, retrieve_events)]
|
[github.fetch_issue_by_number(owner, repo, issue_number, retrieve_events)]
|
||||||
)
|
)
|
||||||
assert issues, f"An issue with a number of {issue_number} was not found"
|
assert issues, f"An issue with a number of {issue_number} was not found"
|
||||||
else:
|
else:
|
||||||
assert db.download(github.GITHUB_ISSUES_DB)
|
assert db.download(github.db_path)
|
||||||
issues = github.get_issues()
|
issues = github.get_issues()
|
||||||
|
|
||||||
for issue in issues:
|
for issue in issues:
|
||||||
|
|
|
@ -7,53 +7,66 @@ import argparse
|
||||||
from logging import getLogger
|
from logging import getLogger
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
from bugbug import db, github
|
from bugbug import db
|
||||||
from bugbug.github import IssueDict
|
from bugbug.github import Github, IssueDict
|
||||||
from bugbug.utils import extract_private, zstd_compress
|
from bugbug.utils import extract_private, zstd_compress
|
||||||
|
|
||||||
logger = getLogger(__name__)
|
logger = getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def replace_with_private(original_data: List[IssueDict]) -> Tuple[List[IssueDict], set]:
|
|
||||||
"""Replace title and body of automatically closed public issues.
|
|
||||||
|
|
||||||
Replace them with title and body of a corresponding private issue
|
|
||||||
to account for moderation workflow in webcompat repository
|
|
||||||
"""
|
|
||||||
updated_ids = set()
|
|
||||||
updated_issues = []
|
|
||||||
for item in original_data:
|
|
||||||
if item["title"] == "Issue closed.":
|
|
||||||
extracted = extract_private(item["body"])
|
|
||||||
if extracted is None:
|
|
||||||
continue
|
|
||||||
|
|
||||||
owner, repo, issue_number = extracted
|
|
||||||
private_issue = github.fetch_issue_by_number(owner, repo, issue_number)
|
|
||||||
if private_issue:
|
|
||||||
item["title"] = private_issue["title"]
|
|
||||||
item["body"] = private_issue["body"]
|
|
||||||
updated_ids.add(item["id"])
|
|
||||||
updated_issues.append(item)
|
|
||||||
|
|
||||||
return updated_issues, updated_ids
|
|
||||||
|
|
||||||
|
|
||||||
class Retriever(object):
|
class Retriever(object):
|
||||||
def retrieve_issues(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
owner: str,
|
owner: str,
|
||||||
repo: str,
|
repo: str,
|
||||||
state: str,
|
state: str,
|
||||||
retrieve_events: bool,
|
retrieve_events: bool,
|
||||||
retrieve_private: bool,
|
retrieve_private: bool,
|
||||||
) -> None:
|
):
|
||||||
|
self.owner = owner
|
||||||
|
self.repo = repo
|
||||||
|
self.state = state
|
||||||
|
self.retrieve_events = retrieve_events
|
||||||
|
self.retrieve_private = retrieve_private
|
||||||
|
self.github = Github(
|
||||||
|
owner=owner, repo=repo, state=state, retrieve_events=retrieve_events
|
||||||
|
)
|
||||||
|
|
||||||
|
def replace_with_private(
|
||||||
|
self, original_data: List[IssueDict]
|
||||||
|
) -> Tuple[List[IssueDict], set]:
|
||||||
|
"""Replace title and body of automatically closed public issues.
|
||||||
|
|
||||||
|
Replace them with title and body of a corresponding private issue
|
||||||
|
to account for moderation workflow in webcompat repository
|
||||||
|
"""
|
||||||
|
updated_ids = set()
|
||||||
|
updated_issues = []
|
||||||
|
for item in original_data:
|
||||||
|
if item["title"] == "Issue closed.":
|
||||||
|
extracted = extract_private(item["body"])
|
||||||
|
if extracted is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
owner, repo, issue_number = extracted
|
||||||
|
private_issue = self.github.fetch_issue_by_number(
|
||||||
|
owner, repo, issue_number
|
||||||
|
)
|
||||||
|
if private_issue:
|
||||||
|
item["title"] = private_issue["title"]
|
||||||
|
item["body"] = private_issue["body"]
|
||||||
|
updated_ids.add(item["id"])
|
||||||
|
updated_issues.append(item)
|
||||||
|
|
||||||
|
return updated_issues, updated_ids
|
||||||
|
|
||||||
|
def retrieve_issues(self) -> None:
|
||||||
|
|
||||||
last_modified = None
|
last_modified = None
|
||||||
db.download(github.GITHUB_ISSUES_DB)
|
db.download(self.github.db_path)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
last_modified = db.last_modified(github.GITHUB_ISSUES_DB)
|
last_modified = db.last_modified(self.github.db_path)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -61,44 +74,44 @@ class Retriever(object):
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Retrieving issues modified or created since the last run on {last_modified.isoformat()}"
|
f"Retrieving issues modified or created since the last run on {last_modified.isoformat()}"
|
||||||
)
|
)
|
||||||
data = github.fetch_issues_updated_since_timestamp(
|
data = self.github.fetch_issues_updated_since_timestamp(
|
||||||
owner, repo, state, last_modified.isoformat(), retrieve_events
|
last_modified.isoformat()
|
||||||
)
|
)
|
||||||
|
|
||||||
if retrieve_private:
|
if self.retrieve_private:
|
||||||
logger.info(
|
logger.info(
|
||||||
"Replacing contents of auto closed public issues with private issues content"
|
"Replacing contents of auto closed public issues with private issues content"
|
||||||
)
|
)
|
||||||
replace_with_private(data)
|
self.replace_with_private(data)
|
||||||
|
|
||||||
updated_ids = set(issue["id"] for issue in data)
|
updated_ids = set(issue["id"] for issue in data)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Deleting issues that were changed since the last run and saving updates"
|
"Deleting issues that were changed since the last run and saving updates"
|
||||||
)
|
)
|
||||||
github.delete_issues(lambda issue: issue["id"] in updated_ids)
|
self.github.delete_issues(lambda issue: issue["id"] in updated_ids)
|
||||||
|
|
||||||
db.append(github.GITHUB_ISSUES_DB, data)
|
db.append(self.github.db_path, data)
|
||||||
logger.info("Updating finished")
|
logger.info("Updating finished")
|
||||||
else:
|
else:
|
||||||
logger.info("Retrieving all issues since last_modified is not available")
|
logger.info("Retrieving all issues since last_modified is not available")
|
||||||
github.download_issues(owner, repo, state, retrieve_events)
|
self.github.download_issues()
|
||||||
|
|
||||||
if retrieve_private:
|
if self.retrieve_private:
|
||||||
logger.info(
|
logger.info(
|
||||||
"Replacing contents of auto closed public issues with private issues content"
|
"Replacing contents of auto closed public issues with private issues content"
|
||||||
)
|
)
|
||||||
|
|
||||||
all_issues = list(github.get_issues())
|
all_issues = list(self.github.get_issues())
|
||||||
updated_issues, updated_ids = replace_with_private(all_issues)
|
updated_issues, updated_ids = self.replace_with_private(all_issues)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
"Deleting public issues that were updated and saving updates"
|
"Deleting public issues that were updated and saving updates"
|
||||||
)
|
)
|
||||||
github.delete_issues(lambda issue: issue["id"] in updated_ids)
|
self.github.delete_issues(lambda issue: issue["id"] in updated_ids)
|
||||||
db.append(github.GITHUB_ISSUES_DB, updated_issues)
|
db.append(self.github.db_path, updated_issues)
|
||||||
|
|
||||||
zstd_compress(github.GITHUB_ISSUES_DB)
|
zstd_compress(self.github.db_path)
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
|
@ -136,10 +149,10 @@ def main() -> None:
|
||||||
# Parse args to show the help if `--help` is passed
|
# Parse args to show the help if `--help` is passed
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
retriever = Retriever()
|
retriever = Retriever(
|
||||||
retriever.retrieve_issues(
|
|
||||||
args.owner, args.repo, args.state, args.retrieve_events, args.retrieve_private
|
args.owner, args.repo, args.state, args.retrieve_events, args.retrieve_private
|
||||||
)
|
)
|
||||||
|
retriever.retrieve_issues()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -9,7 +9,7 @@ import shutil
|
||||||
import pytest
|
import pytest
|
||||||
import zstandard
|
import zstandard
|
||||||
|
|
||||||
from bugbug import bugzilla, github, repository
|
from bugbug import bugzilla, repository
|
||||||
|
|
||||||
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures")
|
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures")
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ def mock_data(tmp_path):
|
||||||
DBs = [
|
DBs = [
|
||||||
os.path.basename(bugzilla.BUGS_DB),
|
os.path.basename(bugzilla.BUGS_DB),
|
||||||
os.path.basename(repository.COMMITS_DB),
|
os.path.basename(repository.COMMITS_DB),
|
||||||
os.path.basename(github.GITHUB_ISSUES_DB),
|
os.path.basename("data/github_webcompat_web-bugs_issues.json"),
|
||||||
]
|
]
|
||||||
|
|
||||||
for f in DBs:
|
for f in DBs:
|
||||||
|
|
|
@ -7,9 +7,10 @@ from unittest import mock
|
||||||
|
|
||||||
import responses
|
import responses
|
||||||
|
|
||||||
from bugbug import github
|
from bugbug.github import Github
|
||||||
|
|
||||||
github.get_token = mock.Mock(return_value="mocked_token")
|
github = Github(owner="webcompat", repo="web-bugs")
|
||||||
|
github.get_token = mock.Mock(return_value="mocked_token") # type: ignore
|
||||||
|
|
||||||
TEST_URL = "https://api.github.com/repos/webcompat/web-bugs/issues"
|
TEST_URL = "https://api.github.com/repos/webcompat/web-bugs/issues"
|
||||||
TEST_EVENTS_URL = "https://api.github.com/repos/webcompat/web-bugs/issues/1/events"
|
TEST_EVENTS_URL = "https://api.github.com/repos/webcompat/web-bugs/issues/1/events"
|
||||||
|
@ -84,10 +85,35 @@ def test_download_issues() -> None:
|
||||||
responses.GET, "https://api.github.com/test&page=3", json=expected, status=200
|
responses.GET, "https://api.github.com/test&page=3", json=expected, status=200
|
||||||
)
|
)
|
||||||
|
|
||||||
github.download_issues("webcompat", "web-bugs", "all")
|
github.download_issues()
|
||||||
|
|
||||||
|
|
||||||
|
def test_download_issues_with_events() -> None:
|
||||||
|
github.retrieve_events = True
|
||||||
|
expected = [{"issue_id": "1", "events_url": TEST_EVENTS_URL}]
|
||||||
|
expected_events = [{"event_id": "1"}]
|
||||||
|
next_url_headers = {"link": "<https://api.github.com/test&page=3>; rel='next'"}
|
||||||
|
|
||||||
|
# Make sure required requests are made as long as next link is present in the header
|
||||||
|
responses.add(responses.GET, TEST_URL, json=expected, status=200, headers=HEADERS)
|
||||||
|
responses.add(
|
||||||
|
responses.GET,
|
||||||
|
"https://api.github.com/test&page=2",
|
||||||
|
json=expected,
|
||||||
|
status=200,
|
||||||
|
headers=next_url_headers,
|
||||||
|
)
|
||||||
|
responses.add(
|
||||||
|
responses.GET, "https://api.github.com/test&page=3", json=expected, status=200
|
||||||
|
)
|
||||||
|
# Mock events request
|
||||||
|
responses.add(responses.GET, TEST_EVENTS_URL, json=expected_events, status=200)
|
||||||
|
|
||||||
|
github.download_issues()
|
||||||
|
|
||||||
|
|
||||||
def test_download_issues_updated_since_timestamp() -> None:
|
def test_download_issues_updated_since_timestamp() -> None:
|
||||||
|
github.retrieve_events = False
|
||||||
first_page = [
|
first_page = [
|
||||||
{"id": 30515129, "issue_id": "1"},
|
{"id": 30515129, "issue_id": "1"},
|
||||||
{"id": 30536238, "issue_id": "2"},
|
{"id": 30536238, "issue_id": "2"},
|
||||||
|
@ -135,14 +161,13 @@ def test_download_issues_updated_since_timestamp() -> None:
|
||||||
|
|
||||||
result = first_page + second_page + third_page
|
result = first_page + second_page + third_page
|
||||||
|
|
||||||
data = github.fetch_issues_updated_since_timestamp(
|
data = github.fetch_issues_updated_since_timestamp("2021-04-03T20:14:04+00:00")
|
||||||
"webcompat", "web-bugs", "all", "2021-04-03T20:14:04+00:00"
|
|
||||||
)
|
|
||||||
|
|
||||||
assert data == result
|
assert data == result
|
||||||
|
|
||||||
|
|
||||||
def test_fetch_issue_by_number() -> None:
|
def test_fetch_issue_by_number() -> None:
|
||||||
|
github.retrieve_events = False
|
||||||
expected = [
|
expected = [
|
||||||
{"issue_id": "1", "events_url": TEST_EVENTS_URL, "labels": [{"name": "test"}]}
|
{"issue_id": "1", "events_url": TEST_EVENTS_URL, "labels": [{"name": "test"}]}
|
||||||
]
|
]
|
||||||
|
|
|
@ -7,11 +7,11 @@ from unittest import mock
|
||||||
|
|
||||||
import responses
|
import responses
|
||||||
|
|
||||||
from bugbug import github
|
|
||||||
from bugbug.github import IssueDict
|
from bugbug.github import IssueDict
|
||||||
from scripts import github_issue_retriever
|
from scripts.github_issue_retriever import Retriever
|
||||||
|
|
||||||
github.get_token = mock.Mock(return_value="mocked_token")
|
github_issue_retriever = Retriever("webcompat", "web-bugs", "all", False, True)
|
||||||
|
github_issue_retriever.github.get_token = mock.Mock(return_value="mocked_token") # type: ignore
|
||||||
|
|
||||||
PUBLIC_BODY = """
|
PUBLIC_BODY = """
|
||||||
<p>Thanks for the report. We have closed this issue\n
|
<p>Thanks for the report. We have closed this issue\n
|
||||||
|
|
Загрузка…
Ссылка в новой задаче