зеркало из https://github.com/mozilla/bugbug.git
Родитель
9da5a4faa5
Коммит
e4db6b219f
|
@ -3,4 +3,4 @@ multi_line_output=3
|
|||
include_trailing_comma=True
|
||||
line_length=88
|
||||
known_first_party = bugbug,bugbug_http
|
||||
known_third_party = _pytest,apispec,apispec_webframeworks,boto3,cerberus,dateutil,flask,flask_cors,hglib,hypothesis,igraph,imblearn,jsone,jsonschema,kombu,libmozdata,lmdb,marshmallow,matplotlib,microannotate,mozci,numpy,orjson,ortools,pandas,pkg_resources,psutil,py,pyemd,pytest,redis,requests,responses,rq,rs_parsepatch,scipy,sentry_sdk,setuptools,shap,sklearn,tabulate,taskcluster,tenacity,tqdm,xgboost,yaml,zstandard
|
||||
known_third_party = _pytest,apispec,apispec_webframeworks,boto3,cerberus,dateutil,flask,flask_cors,hglib,hypothesis,igraph,imblearn,jsone,jsonschema,kombu,libmozdata,lmdb,marshmallow,matplotlib,microannotate,mozci,numpy,orjson,ortools,pandas,pkg_resources,psutil,py,pyemd,pytest,ratelimit,redis,requests,responses,rq,rs_parsepatch,scipy,sentry_sdk,setuptools,shap,sklearn,tabulate,taskcluster,tenacity,tqdm,xgboost,yaml,zstandard
|
||||
|
|
|
@ -0,0 +1,108 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import logging
|
||||
from typing import Iterator, NewType, Tuple
|
||||
|
||||
import requests
|
||||
from ratelimit import limits, sleep_and_retry
|
||||
|
||||
from bugbug import db
|
||||
from bugbug.utils import get_secret
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
GITHUB_ISSUES_DB = "data/github_issues.json"
|
||||
db.register(
|
||||
GITHUB_ISSUES_DB,
|
||||
"https://community-tc.services.mozilla.com/api/index/v1/task/project.bugbug.data_github_issues.latest/artifacts/public/github_issues.json.zst",
|
||||
1,
|
||||
)
|
||||
|
||||
IssueDict = NewType("IssueDict", dict)
|
||||
|
||||
PER_PAGE = 100
|
||||
# Rate limit period in seconds
|
||||
RATE_LIMIT_PERIOD = 900
|
||||
|
||||
|
||||
def get_issues() -> Iterator[IssueDict]:
|
||||
yield from db.read(GITHUB_ISSUES_DB)
|
||||
|
||||
|
||||
@sleep_and_retry
|
||||
@limits(calls=1200, period=RATE_LIMIT_PERIOD)
|
||||
def api_limit():
|
||||
# Allow a limited number of requests to account for rate limiting
|
||||
pass
|
||||
|
||||
|
||||
def get_token() -> str:
|
||||
return get_secret("GITHUB_TOKEN")
|
||||
|
||||
|
||||
def fetch_events(events_url: str) -> str:
|
||||
api_limit()
|
||||
logger.info(f"Fetching {events_url}")
|
||||
headers = {"Authorization": "token {}".format(get_token())}
|
||||
response = requests.get(events_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
events_raw = response.json()
|
||||
return events_raw
|
||||
|
||||
|
||||
def fetch_issues(
|
||||
url: str, retrieve_events: bool, params: dict = None
|
||||
) -> Tuple[str, dict]:
|
||||
api_limit()
|
||||
headers = {"Authorization": "token {}".format(get_token())}
|
||||
response = requests.get(url, params=params, headers=headers)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
logger.info(f"Fetching {url}")
|
||||
|
||||
if retrieve_events:
|
||||
for item in data:
|
||||
events = fetch_events(item["events_url"])
|
||||
item.update({"events": events})
|
||||
|
||||
return data, response.links
|
||||
|
||||
|
||||
def get_start_page() -> int:
|
||||
# Determine next page to fetch based on number of downloaded issues
|
||||
issues = get_issues()
|
||||
count = sum(1 for _ in issues)
|
||||
return int(count / PER_PAGE) + 1
|
||||
|
||||
|
||||
def download_issues(
|
||||
owner: str, repo: str, state: str, retrieve_events: bool = False
|
||||
) -> None:
|
||||
url = "https://api.github.com/repos/{}/{}/issues".format(owner, repo)
|
||||
start_page = get_start_page()
|
||||
|
||||
params = {
|
||||
"state": state,
|
||||
"sort": "created",
|
||||
"direction": "asc",
|
||||
"per_page": PER_PAGE,
|
||||
"page": start_page,
|
||||
}
|
||||
|
||||
data, response_links = fetch_issues(
|
||||
url=url, retrieve_events=retrieve_events, params=params
|
||||
)
|
||||
|
||||
db.append(GITHUB_ISSUES_DB, data)
|
||||
# Fetch next page
|
||||
while "next" in response_links.keys():
|
||||
next_page_data, response_links = fetch_issues(
|
||||
response_links["next"]["url"], retrieve_events
|
||||
)
|
||||
db.append(GITHUB_ISSUES_DB, next_page_data)
|
||||
|
||||
logger.info("Done fetching")
|
|
@ -16,6 +16,7 @@ pydriller==1.12
|
|||
pyOpenSSL>=0.14 # Could not find a version that satisfies the requirement pyOpenSSL>=0.14; extra == "security" (from requests[security]>=2.7.0->libmozdata==0.1.43)
|
||||
python-dateutil==2.8.1
|
||||
python-hglib==2.6.2
|
||||
ratelimit==2.2.1
|
||||
requests==2.25.1
|
||||
rs_parsepatch==0.3.3
|
||||
scikit-learn==0.24.1
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
import argparse
|
||||
|
||||
from bugbug import db, github
|
||||
from bugbug.utils import zstd_compress
|
||||
|
||||
|
||||
class Retriever(object):
|
||||
def retrieve_issues(
|
||||
self, owner: str, repo: str, state: str, retrieve_events: bool
|
||||
) -> None:
|
||||
db.download(github.GITHUB_ISSUES_DB)
|
||||
github.download_issues(owner, repo, state, retrieve_events)
|
||||
zstd_compress(github.GITHUB_ISSUES_DB)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
description = "Retrieve GitHub issues"
|
||||
parser = argparse.ArgumentParser(description=description)
|
||||
parser.add_argument(
|
||||
"--owner",
|
||||
help="GitHub repository owner.",
|
||||
type=str,
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--repo",
|
||||
help="GitHub repository name.",
|
||||
type=str,
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--state",
|
||||
type=str,
|
||||
default="all",
|
||||
help="Indicates the state of the issues to return. Can be either open, closed, or all",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--retrieve-events",
|
||||
action="store_true",
|
||||
help="Whether to retrieve events for each issue.",
|
||||
)
|
||||
|
||||
# Parse args to show the help if `--help` is passed
|
||||
args = parser.parse_args()
|
||||
|
||||
retriever = Retriever()
|
||||
retriever.retrieve_issues(args.owner, args.repo, args.state, args.retrieve_events)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
1
setup.py
1
setup.py
|
@ -61,6 +61,7 @@ setup(
|
|||
"bugbug-testing-policy-stats = scripts.testing_policy_stats:main",
|
||||
"bugbug-generate-landings-risk-report = scripts.generate_landings_risk_report:main",
|
||||
"bugbug-shadow-scheduler-stats = scripts.shadow_scheduler_stats:main",
|
||||
"bugbug-data-github = scripts.github_issue_retriever:main",
|
||||
]
|
||||
},
|
||||
classifiers=[
|
||||
|
|
|
@ -9,7 +9,7 @@ import shutil
|
|||
import pytest
|
||||
import zstandard
|
||||
|
||||
from bugbug import bugzilla, repository
|
||||
from bugbug import bugzilla, github, repository
|
||||
|
||||
FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures")
|
||||
|
||||
|
@ -18,7 +18,11 @@ FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures")
|
|||
def mock_data(tmp_path):
|
||||
os.mkdir(tmp_path / "data")
|
||||
|
||||
DBs = [os.path.basename(bugzilla.BUGS_DB), os.path.basename(repository.COMMITS_DB)]
|
||||
DBs = [
|
||||
os.path.basename(bugzilla.BUGS_DB),
|
||||
os.path.basename(repository.COMMITS_DB),
|
||||
os.path.basename(github.GITHUB_ISSUES_DB),
|
||||
]
|
||||
|
||||
for f in DBs:
|
||||
shutil.copyfile(os.path.join(FIXTURES_DIR, f), tmp_path / "data" / f)
|
||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -0,0 +1,84 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
from unittest import mock
|
||||
|
||||
import responses
|
||||
|
||||
from bugbug import github
|
||||
|
||||
github.get_token = mock.Mock(return_value="mocked_token")
|
||||
|
||||
TEST_URL = "https://api.github.com/repos/webcompat/web-bugs/issues"
|
||||
TEST_EVENTS_URL = "https://api.github.com/repos/webcompat/web-bugs/issues/1/events"
|
||||
HEADERS = {"link": "<https://api.github.com/test&page=2>; rel='next'"}
|
||||
|
||||
|
||||
def test_get_start_page():
|
||||
assert github.get_start_page() == 2
|
||||
|
||||
|
||||
def test_fetch_issues():
|
||||
expected = [{"issue_id": "1", "events_url": TEST_EVENTS_URL}]
|
||||
expected_headers = {
|
||||
"next": {"url": "https://api.github.com/test&page=2", "rel": "next"}
|
||||
}
|
||||
|
||||
# Mock main request
|
||||
responses.add(responses.GET, TEST_URL, json=expected, status=200, headers=HEADERS)
|
||||
|
||||
# Assert that response without events has expected format
|
||||
response = github.fetch_issues(TEST_URL, False)
|
||||
assert response == (expected, expected_headers)
|
||||
|
||||
|
||||
def test_fetch_issues_with_events():
|
||||
expected = [{"issue_id": "1", "events_url": TEST_EVENTS_URL}]
|
||||
expected_events = [{"event_id": "1"}]
|
||||
expected_headers = {
|
||||
"next": {"url": "https://api.github.com/test&page=2", "rel": "next"}
|
||||
}
|
||||
|
||||
# Mock main request
|
||||
responses.add(responses.GET, TEST_URL, json=expected, status=200, headers=HEADERS)
|
||||
# Mock events request
|
||||
responses.add(responses.GET, TEST_EVENTS_URL, json=expected_events, status=200)
|
||||
|
||||
# Assert that response with events has expected format
|
||||
response_with_events = github.fetch_issues(TEST_URL, True)
|
||||
expected_with_events = expected
|
||||
expected_with_events[0]["events"] = expected_events
|
||||
|
||||
assert response_with_events == (expected_with_events, expected_headers)
|
||||
|
||||
|
||||
def test_fetch_issues_empty_header():
|
||||
expected = [{"issue_id": "1", "events_url": TEST_EVENTS_URL}]
|
||||
|
||||
# Mock main request with no headers
|
||||
responses.add(responses.GET, TEST_URL, json=expected, status=200)
|
||||
response_no_headers = github.fetch_issues(TEST_URL, False)
|
||||
|
||||
assert response_no_headers == (expected, {})
|
||||
|
||||
|
||||
def test_download_issues():
|
||||
expected = [{"issue_id": "1", "events_url": TEST_EVENTS_URL}]
|
||||
next_url_headers = {"link": "<https://api.github.com/test&page=3>; rel='next'"}
|
||||
|
||||
# Make sure required requests are made as long as next link is present in the header
|
||||
responses.add(responses.GET, TEST_URL, json=expected, status=200, headers=HEADERS)
|
||||
responses.add(
|
||||
responses.GET,
|
||||
"https://api.github.com/test&page=2",
|
||||
json=expected,
|
||||
status=200,
|
||||
headers=next_url_headers,
|
||||
)
|
||||
responses.add(
|
||||
responses.GET, "https://api.github.com/test&page=3", json=expected, status=200
|
||||
)
|
||||
|
||||
github.download_issues("webcompat", "web-bugs", "all")
|
Загрузка…
Ссылка в новой задаче