diff --git a/.isort.cfg b/.isort.cfg index f1bc66fc..53df42c0 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -4,4 +4,4 @@ include_trailing_comma=True force_grid_wrap=0 use_parentheses=True line_length=88 -known_third_party = dateutil,flask,hglib,imblearn,jsone,jsonschema,libmozdata,matplotlib,microannotate,models,numpy,pandas,pkg_resources,pyemd,pytest,redis,requests,responses,rq,setuptools,shap,sklearn,tabulate,taskcluster,tqdm,xgboost,yaml,zstandard \ No newline at end of file +known_third_party = apispec,apispec_webframeworks,dateutil,flask,flask_cors,hglib,imblearn,jsone,jsonschema,libmozdata,marshmallow,matplotlib,microannotate,models,numpy,pandas,pkg_resources,pyemd,pytest,redis,requests,responses,rq,setuptools,shap,sklearn,tabulate,taskcluster,tqdm,xgboost,yaml,zstandard diff --git a/http_service/.dockerignore b/http_service/.dockerignore index 5a74a939..b90c152d 100644 --- a/http_service/.dockerignore +++ b/http_service/.dockerignore @@ -96,5 +96,6 @@ venv/ # Project-specific stuff cache/ +**/cache/ data/ http_service/ diff --git a/http_service/app.py b/http_service/app.py index 36af257f..8d143886 100644 --- a/http_service/app.py +++ b/http_service/app.py @@ -8,16 +8,42 @@ import logging import os import uuid -from flask import Flask, jsonify, request +from apispec import APISpec +from apispec.ext.marshmallow import MarshmallowPlugin +from apispec_webframeworks.flask import FlaskPlugin +from flask import Flask, jsonify, render_template, request +from flask_cors import cross_origin +from marshmallow import Schema, fields from redis import Redis from rq import Queue from rq.exceptions import NoSuchJobError from rq.job import Job -from .models import classify_bug +from bugbug import get_bugbug_version + +from .models import MODELS_NAMES, classify_bug API_TOKEN = "X-Api-Key" +API_DESCRIPTION = """ +This is the documentation for the BubBug http service, the platform for Bugzilla Machine Learning projects. + +# Introduction + +This service can be used to classify a given bug using a pre-trained model. +You can classify a single bug or a batch of bugs. +The classification happens in the background so you need to call back the service for getting the results. +""" + +spec = APISpec( + title="Bugbug", + version=get_bugbug_version(), + openapi_version="3.0.2", + info=dict(description=API_DESCRIPTION), + plugins=[FlaskPlugin(), MarshmallowPlugin()], + security=[{"api_key": []}], +) + application = Flask(__name__) redis_url = os.environ.get("REDIS_URL", "redis://localhost/0") redis_conn = Redis.from_url(redis_url) @@ -29,6 +55,37 @@ logging.basicConfig(level=logging.INFO) LOGGER = logging.getLogger() +class BugPrediction(Schema): + prob = fields.List(fields.Float()) + index = fields.Integer() + suggestion = fields.Str() + extra_data = fields.Dict() + + +class BugPredictionNotAvailableYet(Schema): + ready = fields.Boolean(enum=[False]) + + +class ModelName(Schema): + model_name = fields.Str(enum=MODELS_NAMES, example="component") + + +class UnauthorizedError(Schema): + message = fields.Str(default="Error, missing X-API-KEY") + + +spec.components.schema(BugPrediction.__name__, schema=BugPrediction) +spec.components.schema( + BugPredictionNotAvailableYet.__name__, schema=BugPredictionNotAvailableYet +) +spec.components.schema(ModelName.__name__, schema=ModelName) +spec.components.schema(UnauthorizedError.__name__, schema=UnauthorizedError) + + +api_key_scheme = {"type": "apiKey", "in": "header", "name": "X-API-Key"} +spec.components.security_scheme("api_key", api_key_scheme) + + def get_job_id(): return uuid.uuid4().hex @@ -85,15 +142,52 @@ def get_bug_classification(model_name, bug_id): return None -@application.route("//predict/") +@application.route("//predict/") +@cross_origin() def model_prediction(model_name, bug_id): + """ + --- + get: + description: Classify a single bug using given model, answer either 200 if the bug is processed or 202 if the bug is being processed + summary: Classify a single bug + parameters: + - name: model_name + in: path + schema: ModelName + - name: bug_id + in: path + schema: + type: integer + example: 123456 + responses: + 200: + description: A single bug prediction + content: + application/json: + schema: BugPrediction + 202: + description: A temporary answer for the bug being processed + content: + application/json: + schema: + type: object + properties: + ready: + type: boolean + enum: [False] + 401: + description: API key is missing + content: + application/json: + schema: UnauthorizedError + """ headers = request.headers redis_conn.ping() auth = headers.get(API_TOKEN) if not auth: - return jsonify({"message": "Error, missing X-API-KEY"}), 401 + return jsonify(UnauthorizedError().dump({}).data), 401 else: LOGGER.info("Request with API TOKEN %r", auth) @@ -110,13 +204,125 @@ def model_prediction(model_name, bug_id): @application.route("//predict/batch", methods=["POST"]) +@cross_origin() def batch_prediction(model_name): + """ + --- + post: + description: > + Post a batch of bug ids to classify, answer either 200 if all bugs are + processed or 202 if at least one bug is not processed. +

+ Starts by sending a batch of bugs ids like this:
+ ``` + {"bugs": [123, 456]} + ```

+ + You will likely get a 202 answer that indicates that no result is + available yet for any of the bug id you provided with the following + body:
+ + ``` + {"bugs": {"123": {ready: False}, "456": {ready: False}}} + ```

+ + Call back the same endpoint with the same bug ids a bit later, and you + will get the results.

+ + You might get the following output if some bugs are not available: +
+ + ``` + {"bugs": {"123": {"available": False}}} + ```

+ + And you will get the following output once the bugs are available: +
+ ``` + {"bugs": {"456": {"extra_data": {}, "index": 0, "prob": [0], "suggestion": ""}}} + ```

+ + Please be aware that each bug could be in a different state, so the + following output, where a bug is returned and another one is still + being processed, is valid: +
+ ``` + {"bugs": {"123": {"available": False}, "456": {"extra_data": {}, "index": 0, "prob": [0], "suggestion": ""}}} + ``` + summary: Classify a batch of bugs + parameters: + - name: model_name + in: path + schema: ModelName + requestBody: + description: The list of bugs to classify + content: + application/json: + schema: + type: object + properties: + bugs: + type: array + items: + type: integer + examples: + cat: + summary: An example of payload + value: + bugs: + [123456, 789012] + responses: + 200: + description: A list of results + content: + application/json: + schema: + type: object + additionalProperties: true + example: + bugs: + 123456: + extra_data: {} + index: 0 + prob: [0] + suggestion: string + 789012: + extra_data: {} + index: 0 + prob: [0] + suggestion: string + 202: + description: A temporary answer for bugs being processed + content: + application/json: + schema: + type: object + items: + type: object + properties: + ready: + type: boolean + enum: [False] + example: + bugs: + 123456: + extra_data: {} + index: 0 + prob: [0] + suggestion: string + 789012: {ready: False} + 401: + description: API key is missing + content: + application/json: + schema: UnauthorizedError + """ headers = request.headers auth = headers.get(API_TOKEN) if not auth: - return jsonify({"message": "Error, missing X-API-KEY"}), 401 + return jsonify(UnauthorizedError().dump({}).data), 401 else: LOGGER.info("Request with API TOKEN %r", auth) @@ -144,4 +350,21 @@ def batch_prediction(model_name): # not like getting 1 million bug at a time schedule_bug_classification(model_name, missing_bugs) - return jsonify(**data), status_code + return jsonify({"bugs": data}), status_code + + +@application.route("/swagger") +@cross_origin() +def swagger(): + for name, rule in application.view_functions.items(): + # Ignore static endpoint as it isn't documented with OpenAPI + if name == "static": + continue + spec.path(view=rule) + + return jsonify(spec.to_dict()) + + +@application.route("/doc") +def doc(): + return render_template("doc.html") diff --git a/http_service/requirements.txt b/http_service/requirements.txt index 7c95da6a..3fab5deb 100644 --- a/http_service/requirements.txt +++ b/http_service/requirements.txt @@ -1,4 +1,9 @@ +apispec-webframeworks==0.4.0 +apispec[yaml]==2.0.0 +flask-apispec==0.8.0 +flask-cors Flask==1.0.3 gunicorn==19.9.0 -rq==1.0 +marshmallow==2.19.5 rq-dashboard==0.5.1 +rq==1.0 \ No newline at end of file diff --git a/http_service/templates/doc.html b/http_service/templates/doc.html new file mode 100644 index 00000000..7dac3334 --- /dev/null +++ b/http_service/templates/doc.html @@ -0,0 +1,23 @@ + + + + BugBug documentation + + + + + + + + + + + + \ No newline at end of file