зеркало из https://github.com/mozilla/bugbug.git
Add support for GitHub issue classification to the HTTP service (#2330)
This commit is contained in:
Родитель
7e58f6d1db
Коммит
fe6f06c704
|
@ -30,6 +30,7 @@ services:
|
|||
image: mozilla/bugbug-http-service
|
||||
environment:
|
||||
- BUGBUG_BUGZILLA_TOKEN
|
||||
- BUGBUG_GITHUB_TOKEN
|
||||
- PORT=8000
|
||||
ports:
|
||||
- target: 8000
|
||||
|
@ -44,6 +45,7 @@ services:
|
|||
image: mozilla/bugbug-http-service-bg-worker
|
||||
environment:
|
||||
- BUGBUG_BUGZILLA_TOKEN
|
||||
- BUGBUG_GITHUB_TOKEN
|
||||
|
||||
bugbug-spawn-pipeline:
|
||||
build:
|
||||
|
|
|
@ -0,0 +1,23 @@
|
|||
### Local development
|
||||
|
||||
**For starting the service locally run the following commands.**
|
||||
|
||||
Start Redis:
|
||||
|
||||
docker-compose up redis
|
||||
|
||||
Build the http service image:
|
||||
|
||||
docker build -t mozilla/bugbug-http-service -f Dockerfile .
|
||||
|
||||
Start the http service:
|
||||
|
||||
docker-compose up bugbug-http-service
|
||||
|
||||
Build the background worker image:
|
||||
|
||||
docker build -t mozilla/bugbug-http-service-bg-worker --build-arg TAG=latest -f Dockerfile.bg_worker .
|
||||
|
||||
Run the background worker:
|
||||
|
||||
docker-compose up bugbug-http-service-bg-worker
|
|
@ -32,6 +32,7 @@ from bugbug import get_bugbug_version, utils
|
|||
from bugbug_http.models import (
|
||||
MODELS_NAMES,
|
||||
classify_bug,
|
||||
classify_issue,
|
||||
get_config_specific_groups,
|
||||
schedule_tests,
|
||||
)
|
||||
|
@ -80,6 +81,8 @@ q = Queue(
|
|||
VALIDATOR = Validator()
|
||||
|
||||
BUGZILLA_TOKEN = os.environ.get("BUGBUG_BUGZILLA_TOKEN")
|
||||
GITHUB_TOKEN = os.environ.get("BUGBUG_GITHUB_TOKEN")
|
||||
|
||||
BUGZILLA_API_URL = (
|
||||
libmozdata.config.get("Bugzilla", "URL", "https://bugzilla.mozilla.org")
|
||||
+ "/rest/bug"
|
||||
|
@ -212,6 +215,27 @@ def schedule_bug_classification(model_name: str, bug_ids: Sequence[int]) -> None
|
|||
)
|
||||
|
||||
|
||||
def schedule_issue_classification(
|
||||
model_name: str, owner: str, repo: str, issue_nums: Sequence[int]
|
||||
) -> None:
|
||||
"""Schedule the classification of a issue_id list"""
|
||||
job_id = get_job_id()
|
||||
|
||||
# Set the mapping before queuing to avoid some race conditions
|
||||
job_id_mapping = {}
|
||||
for issue_num in issue_nums:
|
||||
key = JobInfo(classify_issue, model_name, owner, repo, issue_num).mapping_key
|
||||
job_id_mapping[key] = job_id
|
||||
|
||||
redis_conn.mset(job_id_mapping)
|
||||
|
||||
schedule_job(
|
||||
JobInfo(classify_issue, model_name, owner, repo, issue_nums),
|
||||
job_id=job_id,
|
||||
timeout=BUGZILLA_JOB_TIMEOUT,
|
||||
)
|
||||
|
||||
|
||||
def is_pending(job):
|
||||
# Check if there is a job
|
||||
job_id = redis_conn.get(job.mapping_key)
|
||||
|
@ -275,6 +299,23 @@ def get_bugs_last_change_time(bug_ids):
|
|||
return bugs
|
||||
|
||||
|
||||
def get_github_issues_update_time(
|
||||
owner: str, repo: str, issue_nums: Sequence[int]
|
||||
) -> dict:
|
||||
header = {"Authorization": "token {}".format(GITHUB_TOKEN)}
|
||||
repo_url = f"https://api.github.com/repos/{owner}/{repo}/issues/"
|
||||
|
||||
issues = {}
|
||||
for issue_num in issue_nums:
|
||||
issue_url = repo_url + str(issue_num)
|
||||
response = utils.get_session("github").get(issue_url, headers=header)
|
||||
response.raise_for_status()
|
||||
raw_issue = response.json()
|
||||
issues[raw_issue["number"]] = raw_issue["updated_at"]
|
||||
|
||||
return issues
|
||||
|
||||
|
||||
def is_prediction_invalidated(job, change_time):
|
||||
# First get the saved change time
|
||||
saved_change_time = redis_conn.get(job.change_time_key)
|
||||
|
@ -407,6 +448,84 @@ def model_prediction(model_name, bug_id):
|
|||
return compress_response(data, status_code)
|
||||
|
||||
|
||||
@application.route(
|
||||
"/<model_name>/predict/github/<string:owner>/<string:repo>/<int:issue_num>"
|
||||
)
|
||||
@cross_origin()
|
||||
def model_prediction_github(model_name, owner, repo, issue_num):
|
||||
"""
|
||||
---
|
||||
get:
|
||||
description: Classify a single issue using given model, answer either 200 if the issue is processed or 202 if the issue is being processed
|
||||
summary: Classify a single issue
|
||||
parameters:
|
||||
- name: model_name
|
||||
in: path
|
||||
schema: ModelName
|
||||
- name: owner
|
||||
in: path
|
||||
schema:
|
||||
type: str
|
||||
example: webcompat
|
||||
- name: repo
|
||||
in: path
|
||||
schema:
|
||||
type: str
|
||||
example: web-bugs
|
||||
- name: issue_number
|
||||
in: path
|
||||
schema:
|
||||
type: integer
|
||||
example: 123456
|
||||
responses:
|
||||
200:
|
||||
description: A single issue prediction
|
||||
content:
|
||||
application/json:
|
||||
schema: BugPrediction
|
||||
202:
|
||||
description: A temporary answer for the issue being processed
|
||||
content:
|
||||
application/json:
|
||||
schema: NotAvailableYet
|
||||
401:
|
||||
description: API key is missing
|
||||
content:
|
||||
application/json:
|
||||
schema: UnauthorizedError
|
||||
"""
|
||||
headers = request.headers
|
||||
|
||||
auth = headers.get(API_TOKEN)
|
||||
|
||||
if not auth:
|
||||
return jsonify(UnauthorizedError().dump({})), 401
|
||||
else:
|
||||
LOGGER.info("Request with API TOKEN %r", auth)
|
||||
|
||||
if model_name not in MODELS_NAMES:
|
||||
return jsonify({"error": f"Model {model_name} doesn't exist"}), 404
|
||||
|
||||
# Get the latest change date from github for the issue
|
||||
update_time = get_github_issues_update_time(owner, repo, [issue_num])
|
||||
|
||||
job = JobInfo(classify_issue, model_name, owner, repo, issue_num)
|
||||
issue_change_time = update_time.get(issue_num)
|
||||
if issue_change_time and is_prediction_invalidated(job, update_time[issue_num]):
|
||||
clean_prediction_cache(job)
|
||||
|
||||
status_code = 200
|
||||
data = get_result(job)
|
||||
|
||||
if not data:
|
||||
if not is_pending(job):
|
||||
schedule_issue_classification(model_name, owner, repo, [issue_num])
|
||||
status_code = 202
|
||||
data = {"ready": False}
|
||||
|
||||
return compress_response(data, status_code)
|
||||
|
||||
|
||||
@application.route("/<model_name>/predict/batch", methods=["POST"])
|
||||
@cross_origin()
|
||||
def batch_prediction(model_name):
|
||||
|
|
|
@ -15,7 +15,7 @@ import zstandard
|
|||
from libmozdata.bugzilla import Bugzilla
|
||||
from redis import Redis
|
||||
|
||||
from bugbug import bugzilla, repository, test_scheduling
|
||||
from bugbug import bugzilla, github, repository, test_scheduling
|
||||
from bugbug.model import Model
|
||||
from bugbug.utils import get_hgmo_stack
|
||||
from bugbug_http.readthrough_cache import ReadthroughTTLCache
|
||||
|
@ -26,6 +26,7 @@ LOGGER = logging.getLogger()
|
|||
MODELS_NAMES = [
|
||||
"defectenhancementtask",
|
||||
"component",
|
||||
"needsdiagnosis",
|
||||
"regression",
|
||||
"stepstoreproduce",
|
||||
"spambug",
|
||||
|
@ -110,6 +111,64 @@ def classify_bug(model_name: str, bug_ids: Sequence[int], bugzilla_token: str) -
|
|||
return "OK"
|
||||
|
||||
|
||||
def classify_issue(
|
||||
model_name: str, owner: str, repo: str, issue_nums: Sequence[int]
|
||||
) -> str:
|
||||
from bugbug_http.app import JobInfo
|
||||
|
||||
issue_ids_set = set(map(int, issue_nums))
|
||||
|
||||
issues = {
|
||||
issue_num: github.fetch_issue_by_number(owner, repo, issue_num, True)
|
||||
for issue_num in issue_nums
|
||||
}
|
||||
|
||||
missing_issues = issue_ids_set.difference(issues.keys())
|
||||
|
||||
for issue_id in missing_issues:
|
||||
job = JobInfo(classify_issue, model_name, owner, repo, issue_id)
|
||||
|
||||
# TODO: Find a better error format
|
||||
setkey(job.result_key, orjson.dumps({"available": False}))
|
||||
|
||||
if not issues:
|
||||
return "NOK"
|
||||
|
||||
model = MODEL_CACHE.get(model_name)
|
||||
|
||||
if not model:
|
||||
LOGGER.info("Missing model %r, aborting" % model_name)
|
||||
return "NOK"
|
||||
|
||||
model_extra_data = model.get_extra_data()
|
||||
|
||||
# TODO: Classify could choke on a single bug which could make the whole
|
||||
# job to fail. What should we do here?
|
||||
probs = model.classify(list(issues.values()), True)
|
||||
indexes = probs.argmax(axis=-1)
|
||||
suggestions = model.le.inverse_transform(indexes)
|
||||
|
||||
probs_list = probs.tolist()
|
||||
indexes_list = indexes.tolist()
|
||||
suggestions_list = suggestions.tolist()
|
||||
|
||||
for i, issue_id in enumerate(issues.keys()):
|
||||
data = {
|
||||
"prob": probs_list[i],
|
||||
"index": indexes_list[i],
|
||||
"class": suggestions_list[i],
|
||||
"extra_data": model_extra_data,
|
||||
}
|
||||
|
||||
job = JobInfo(classify_issue, model_name, owner, repo, issue_id)
|
||||
setkey(job.result_key, orjson.dumps(data), compress=True)
|
||||
|
||||
# Save the bug last change
|
||||
setkey(job.change_time_key, issues[issue_id]["updated_at"].encode())
|
||||
|
||||
return "OK"
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def get_known_tasks() -> Tuple[str, ...]:
|
||||
with open("known_tasks", "r") as f:
|
||||
|
|
|
@ -6,6 +6,7 @@ services:
|
|||
image: mozilla/bugbug-http-service
|
||||
environment:
|
||||
- BUGBUG_BUGZILLA_TOKEN
|
||||
- BUGBUG_GITHUB_TOKEN
|
||||
- REDIS_URL=redis://redis:6379/0
|
||||
- PORT=8000
|
||||
- PULSE_USER
|
||||
|
@ -26,6 +27,7 @@ services:
|
|||
image: mozilla/bugbug-http-service-bg-worker
|
||||
environment:
|
||||
- BUGBUG_BUGZILLA_TOKEN
|
||||
- BUGBUG_GITHUB_TOKEN
|
||||
- REDIS_URL=redis://redis:6379/0
|
||||
- BUGBUG_ALLOW_MISSING_MODELS
|
||||
- BUGBUG_REPO_DIR
|
||||
|
|
|
@ -61,6 +61,47 @@ def test_model_predict_id(client, jobs, add_result, responses):
|
|||
assert retrieve_compressed_reponse(rv) == result
|
||||
|
||||
|
||||
def test_model_predict_id_github(client, jobs, add_result, responses):
|
||||
issue_id = "12345"
|
||||
result = {
|
||||
"prob": [0.11845558881759644, 0.8815444111824036],
|
||||
"index": 1,
|
||||
"class": 1,
|
||||
"extra_data": {},
|
||||
}
|
||||
|
||||
responses.add(
|
||||
responses.GET,
|
||||
f"https://api.github.com/repos/webcompat/web-bugs/issues/{issue_id}",
|
||||
status=200,
|
||||
json={"number": issue_id, "updated_at": time.time()},
|
||||
)
|
||||
|
||||
def do_request():
|
||||
return client.get(
|
||||
"/needsdiagnosis/predict/github/webcompat/web-bugs/12345",
|
||||
headers={API_TOKEN: "test"},
|
||||
)
|
||||
|
||||
rv = do_request()
|
||||
assert rv.status_code == 202
|
||||
assert retrieve_compressed_reponse(rv) == {"ready": False}
|
||||
|
||||
# request still not ready
|
||||
rv = do_request()
|
||||
assert rv.status_code == 202
|
||||
assert retrieve_compressed_reponse(rv) == {"ready": False}
|
||||
assert len(jobs) == 1
|
||||
|
||||
# now it's ready
|
||||
keys = next(iter(jobs.values()))
|
||||
add_result(keys[0], result)
|
||||
|
||||
rv = do_request()
|
||||
assert rv.status_code == 200
|
||||
assert retrieve_compressed_reponse(rv) == result
|
||||
|
||||
|
||||
def test_model_predict_batch(client, jobs, add_result, add_change_time, responses):
|
||||
bug_ids = [123, 456]
|
||||
result = {
|
||||
|
|
Загрузка…
Ссылка в новой задаче