Fixes #135 - Add a job to fetch Webcompat Knowldege Base related bugs and store them in BQ (#136)

* Fixes #135 - Add a job to fetch Webcompat Knowldegebase related bugs from bugzilla and store them in BQ

* Fixes #135 - Use a specific flake8 version
This commit is contained in:
Ksenia 2023-08-17 15:10:07 -04:00 коммит произвёл GitHub
Родитель 9f7333e2da
Коммит e11acc964d
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
14 изменённых файлов: 918 добавлений и 0 удалений

Просмотреть файл

@ -279,6 +279,22 @@ jobs:
command: docker run app:build pytest
build-job-webcompat-kb:
docker:
- image: << pipeline.parameters.git-image >>
steps:
- checkout
- compare-branch:
pattern: ^jobs/webcompat-kb/
- setup_remote_docker:
version: << pipeline.parameters.docker-version >>
- run:
name: Build Docker image
command: docker build -t app:build jobs/webcompat-kb/
- run:
name: Test Code
command: docker run app:build pytest --flake8 --black
workflows:
docker-etl:
jobs:
@ -475,3 +491,17 @@ workflows:
branches:
only: main
job-webcompat-kb:
jobs:
- build-job-webcompat-kb
- gcp-gcr/build-and-push-image:
context: data-eng-airflow-gcr
docker-context: jobs/webcompat-kb/
path: jobs/webcompat-kb/
image: webcompat-kb_docker_etl
requires:
- build-job-webcompat-kb
filters:
branches:
only: main

Просмотреть файл

@ -0,0 +1,7 @@
.ci_job.yaml
.ci_workflow.yaml
.DS_Store
*.pyc
.pytest_cache/
__pycache__/
venv/

Просмотреть файл

@ -0,0 +1,2 @@
[flake8]
max-line-length = 88

4
jobs/webcompat-kb/.gitignore поставляемый Normal file
Просмотреть файл

@ -0,0 +1,4 @@
.DS_Store
*.pyc
__pycache__/
venv/

Просмотреть файл

@ -0,0 +1,26 @@
FROM python:3.8
MAINTAINER <kberezina@mozilla.com>
# https://github.com/mozilla-services/Dockerflow/blob/master/docs/building-container.md
ARG USER_ID="10001"
ARG GROUP_ID="app"
ARG HOME="/app"
ENV HOME=${HOME}
RUN groupadd --gid ${USER_ID} ${GROUP_ID} && \
useradd --create-home --uid ${USER_ID} --gid ${GROUP_ID} --home-dir ${HOME} ${GROUP_ID}
WORKDIR ${HOME}
RUN pip install --upgrade pip
COPY requirements.txt requirements.txt
RUN pip install -r requirements.txt
COPY . .
RUN pip install .
# Drop root and change ownership of the application folder to the user
RUN chown -R ${USER_ID}:${GROUP_ID} ${HOME}
USER ${USER_ID}

Просмотреть файл

@ -0,0 +1,47 @@
# Web compatibility Knowledge Base bugs import from bugzilla
This job fetches bugzilla bugs from Web Compatibility > Knowledge Base component,
as well as their core bugs dependencies and breakage reports and puts them into BQ.
## Usage
This script is intended to be run in a docker container.
Build the docker image with:
```sh
docker build -t webcompat-kb .
```
To run locally, first install dependencies in `jobs/webcompat-kb`:
```sh
pip install -r requirements.txt
```
And then run the script after authentication with gcloud:
```sh
gcloud auth application-default login
python3 webcompat_kb/main.py --bq_project_id=<your_project_id> --bq_dataset_id=<your_dataset_id>
```
## Development
Run tests with:
```sh
pytest
```
`flake8` and `black` are included for code linting and formatting:
```sh
pytest --black --flake8
```
or
```sh
flake8 webcompat_kb/ tests/
black --diff webcompat_kb/ tests/
```

Просмотреть файл

@ -0,0 +1,15 @@
build-job-webcompat-kb:
docker:
- image: << pipeline.parameters.git-image >>
steps:
- checkout
- compare-branch:
pattern: ^jobs/webcompat-kb/
- setup_remote_docker:
version: << pipeline.parameters.docker-version >>
- run:
name: Build Docker image
command: docker build -t app:build jobs/webcompat-kb/
- run:
name: Test Code
command: docker run app:build pytest --flake8 --black

Просмотреть файл

@ -0,0 +1,13 @@
job-webcompat-kb:
jobs:
- build-job-webcompat-kb
- gcp-gcr/build-and-push-image:
context: data-eng-airflow-gcr
docker-context: jobs/webcompat-kb/
path: jobs/webcompat-kb/
image: webcompat-kb_docker_etl
requires:
- build-job-webcompat-kb
filters:
branches:
only: main

Просмотреть файл

@ -0,0 +1,3 @@
[pytest]
testpaths =
tests

Просмотреть файл

@ -0,0 +1,6 @@
click==8.0.4
flake8==3.8.4
google-cloud-bigquery==3.11.4
pytest==6.0.2
pytest-black==0.3.11
pytest-flake8==1.0.6

Просмотреть файл

@ -0,0 +1,15 @@
#!/usr/bin/env python
from setuptools import setup, find_packages
readme = open("README.md").read()
setup(
name="webcompat-kb",
version="0.1.0",
author="Mozilla Corporation",
packages=find_packages(include=["webcompat_kb"]),
long_description=readme,
include_package_data=True,
license="MPL 2.0",
)

Просмотреть файл

Просмотреть файл

@ -0,0 +1,390 @@
from webcompat_kb.main import process_fields
from webcompat_kb.main import extract_int_from_field
from webcompat_kb.main import build_relations, merge_relations
from webcompat_kb.main import add_links
from webcompat_kb.main import RELATION_CONFIG, LINK_FIELDS
SAMPLE_BUGS = [
{
"see_also": [
"https://github.com/webcompat/web-bugs/issues/13503",
"https://github.com/webcompat/web-bugs/issues/91682",
"https://bugzilla.mozilla.org/show_bug.cgi?id=1633399",
"https://bugzilla.mozilla.org/show_bug.cgi?id=1735227",
"https://bugzilla.mozilla.org/show_bug.cgi?id=1739489",
"https://bugzilla.mozilla.org/show_bug.cgi?id=1739791",
"https://github.com/webcompat/web-bugs/issues/109064",
"https://github.com/mozilla-extensions/webcompat-addon/blob/5b391018e847a1eb30eba4784c86acd1c638ed26/src/injections/js/bug1739489-draftjs-beforeinput.js", # noqa
"https://github.com/webcompat/web-bugs/issues/112848",
"https://github.com/webcompat/web-bugs/issues/117039",
],
"cf_user_story": "url:cmcreg.bancosantander.es/*\r\nurl:new.reddit.com/*\r\nurl:web.whatsapp.com/*\r\nurl:facebook.com/*\r\nurl:twitter.com/*\r\nurl:reddit.com/*\r\nurl:mobilevikings.be/*\r\nurl:book.ersthelfer.tv/*", # noqa
"severity": "--",
"priority": "--",
"depends_on": [903746],
"component": "Knowledge Base",
"product": "Web Compatibility",
"resolution": "",
"status": "NEW",
"blocks": [],
"id": 1835339,
"summary": "Missing implementation of textinput event",
},
{
"component": "Knowledge Base",
"product": "Web Compatibility",
"depends_on": [],
"see_also": [
"https://github.com/webcompat/web-bugs/issues/100260",
"https://github.com/webcompat/web-bugs/issues/22829",
"https://github.com/webcompat/web-bugs/issues/62926",
"https://github.com/webcompat/web-bugs/issues/66768",
"https://github.com/webcompat/web-bugs/issues/112423",
"https://mozilla.github.io/standards-positions/#webusb",
"https://github.com/webcompat/web-bugs/issues/122436",
"https://github.com/webcompat/web-bugs/issues/122127",
"https://github.com/webcompat/web-bugs/issues/120886",
],
"summary": "Sites breaking due to the lack of WebUSB support",
"id": 1835416,
"blocks": [],
"resolution": "",
"priority": "--",
"severity": "--",
"cf_user_story": "url:webminidisc.com/*\r\nurl:app.webadb.com/*\r\nurl:www.numworks.com/*\r\nurl:webadb.github.io/*\r\nurl:www.stemplayer.com/*\r\nurl:wootility.io/*\r\nurl:python.microbit.org/*\r\nurl:flash.android.com/*", # noqa
"status": "NEW",
},
{
"component": "Knowledge Base",
"product": "Web Compatibility",
"depends_on": [555555],
"see_also": [
"https://crbug.com/606208",
"https://github.com/whatwg/html/issues/1896",
"https://w3c.github.io/trusted-types/dist/spec/",
"https://github.com/webcompat/web-bugs/issues/124877",
"https://github.com/mozilla/standards-positions/issues/20",
"https://github.com/WebKit/standards-positions/issues/186",
],
"summary": "Test bug",
"id": 111111,
"blocks": [222222, 1734557],
"resolution": "",
"priority": "--",
"severity": "--",
"cf_user_story": "",
"status": "NEW",
},
]
SAMPLE_CORE_BUGS = [
{
"id": 903746,
"severity": "--",
"priority": "--",
"cf_user_story": "",
"depends_on": [],
"status": "UNCONFIRMED",
"product": "Core",
"blocks": [1754236, 1835339],
"component": "DOM: Events",
"see_also": [
"https://bugzilla.mozilla.org/show_bug.cgi?id=1739489",
"https://bugzilla.mozilla.org/show_bug.cgi?id=1739791",
"https://bugzilla.mozilla.org/show_bug.cgi?id=1735227",
"https://bugzilla.mozilla.org/show_bug.cgi?id=1633399",
"https://github.com/webcompat/web-bugs/issues/109064",
"https://github.com/webcompat/web-bugs/issues/112848",
"https://github.com/webcompat/web-bugs/issues/117039",
"https://github.com/w3c/uievents/issues/353",
],
"resolution": "",
"summary": "Missing textinput event",
},
{
"id": 555555,
"severity": "--",
"priority": "--",
"cf_user_story": "",
"depends_on": [],
"status": "UNCONFIRMED",
"product": "Core",
"blocks": [],
"component": "Test",
"see_also": ["https://mozilla.github.io/standards-positions/#testposition"],
"resolution": "",
"summary": "Test Core bug",
},
]
SAMPLE_BREAKAGE_BUGS = [
{
"id": 1734557,
"product": "Web Compatibility",
"cf_user_story": "url:angusnicneven.com/*",
"blocks": [],
"status": "ASSIGNED",
"summary": "Javascript causes infinite scroll because event.path is undefined",
"resolution": "",
"depends_on": [111111],
"see_also": [],
"component": "Desktop",
"severity": "--",
"priority": "--",
},
{
"id": 222222,
"product": "Web Compatibility",
"cf_user_story": "url:example.com/*",
"blocks": [],
"status": "ASSIGNED",
"summary": "Test breakage bug",
"resolution": "",
"depends_on": [111111],
"see_also": [],
"component": "Desktop",
"severity": "--",
"priority": "--",
},
]
class TestMain:
def test_extract_int_from_field(self):
field = extract_int_from_field("P3")
assert field == 3
field = extract_int_from_field("critical")
assert field == 1
field = extract_int_from_field("--")
assert field is None
field = extract_int_from_field("N/A")
assert field is None
field = extract_int_from_field("")
assert field is None
field = extract_int_from_field(None)
assert field is None
def test_process_fields_with_no_bugs(self):
result = process_fields([], RELATION_CONFIG)
expected = ({}, {})
assert result == expected
def test_process_fields(self):
bugs, ids = process_fields(SAMPLE_BUGS, RELATION_CONFIG)
expected_processed_bugs = {
1835339: {
"core_bugs": [903746],
"breakage_reports": [],
"interventions": [
"https://github.com/mozilla-extensions/webcompat-addon/blob/5b391018e847a1eb30eba4784c86acd1c638ed26/src/injections/js/bug1739489-draftjs-beforeinput.js" # noqa
],
"other_browser_issues": [],
"standards_issues": [],
"standards_positions": [],
"url_patterns": [
"cmcreg.bancosantander.es/*",
"new.reddit.com/*",
"web.whatsapp.com/*",
"facebook.com/*",
"twitter.com/*",
"reddit.com/*",
"mobilevikings.be/*",
"book.ersthelfer.tv/*",
],
},
1835416: {
"core_bugs": [],
"breakage_reports": [],
"interventions": [],
"other_browser_issues": [],
"standards_issues": [],
"standards_positions": [
"https://mozilla.github.io/standards-positions/#webusb"
],
"url_patterns": [
"webminidisc.com/*",
"app.webadb.com/*",
"www.numworks.com/*",
"webadb.github.io/*",
"www.stemplayer.com/*",
"wootility.io/*",
"python.microbit.org/*",
"flash.android.com/*",
],
},
111111: {
"core_bugs": [555555],
"breakage_reports": [222222, 1734557],
"interventions": [],
"other_browser_issues": ["https://crbug.com/606208"],
"standards_issues": ["https://github.com/whatwg/html/issues/1896"],
"standards_positions": [
"https://github.com/mozilla/standards-positions/issues/20",
"https://github.com/WebKit/standards-positions/issues/186",
],
"url_patterns": [],
},
}
expected_bug_ids = {
"core": [903746, 555555],
"breakage": [222222, 1734557],
}
assert bugs == expected_processed_bugs
assert ids == expected_bug_ids
def test_process_fields_config_variation(self):
bugs, ids = process_fields(
SAMPLE_BREAKAGE_BUGS, {"url_patterns": RELATION_CONFIG["url_patterns"]}
)
expected = {
1734557: {"url_patterns": ["angusnicneven.com/*"]},
222222: {"url_patterns": ["example.com/*"]},
}
assert bugs == expected
assert ids == {}
def test_relations(self):
bugs, _ = process_fields(SAMPLE_BUGS, RELATION_CONFIG)
relations = build_relations(bugs, RELATION_CONFIG)
assert relations["core_bugs"] == [
{"knowledge_base_bug": 1835339, "core_bug": 903746},
{"knowledge_base_bug": 111111, "core_bug": 555555},
]
assert relations["breakage_reports"] == [
{"knowledge_base_bug": 111111, "breakage_bug": 222222},
{"knowledge_base_bug": 111111, "breakage_bug": 1734557},
]
assert relations["interventions"] == [
{
"knowledge_base_bug": 1835339,
"code_url": "https://github.com/mozilla-extensions/webcompat-addon/blob/5b391018e847a1eb30eba4784c86acd1c638ed26/src/injections/js/bug1739489-draftjs-beforeinput.js", # noqa
}
]
assert relations["other_browser_issues"] == [
{"knowledge_base_bug": 111111, "issue_url": "https://crbug.com/606208"}
]
assert relations["standards_issues"] == [
{
"knowledge_base_bug": 111111,
"issue_url": "https://github.com/whatwg/html/issues/1896",
}
]
assert relations["standards_positions"] == [
{
"knowledge_base_bug": 1835416,
"discussion_url": "https://mozilla.github.io/standards-positions/#webusb", # noqa
},
{
"knowledge_base_bug": 111111,
"discussion_url": "https://github.com/mozilla/standards-positions/issues/20", # noqa
},
{
"knowledge_base_bug": 111111,
"discussion_url": "https://github.com/WebKit/standards-positions/issues/186", # noqa
},
]
assert relations["url_patterns"] == [
{
"bug": 1835339,
"url_pattern": "cmcreg.bancosantander.es/*",
},
{"bug": 1835339, "url_pattern": "new.reddit.com/*"},
{"bug": 1835339, "url_pattern": "web.whatsapp.com/*"},
{"bug": 1835339, "url_pattern": "facebook.com/*"},
{"bug": 1835339, "url_pattern": "twitter.com/*"},
{"bug": 1835339, "url_pattern": "reddit.com/*"},
{"bug": 1835339, "url_pattern": "mobilevikings.be/*"},
{"bug": 1835339, "url_pattern": "book.ersthelfer.tv/*"},
{"bug": 1835416, "url_pattern": "webminidisc.com/*"},
{"bug": 1835416, "url_pattern": "app.webadb.com/*"},
{"bug": 1835416, "url_pattern": "www.numworks.com/*"},
{"bug": 1835416, "url_pattern": "webadb.github.io/*"},
{"bug": 1835416, "url_pattern": "www.stemplayer.com/*"},
{"bug": 1835416, "url_pattern": "wootility.io/*"},
{"bug": 1835416, "url_pattern": "python.microbit.org/*"},
{"bug": 1835416, "url_pattern": "flash.android.com/*"},
]
def test_relations_config_variation(self):
url_only_config = {"url_patterns": RELATION_CONFIG["url_patterns"]}
bugs, _ = process_fields(SAMPLE_BREAKAGE_BUGS, url_only_config)
relations = build_relations(bugs, url_only_config)
assert len(relations) == 1
assert "url_patterns" in relations
assert relations["url_patterns"] == [
{"bug": 1734557, "url_pattern": "angusnicneven.com/*"},
{"bug": 222222, "url_pattern": "example.com/*"},
]
def test_add_links(self):
bugs, _ = process_fields(SAMPLE_BUGS, RELATION_CONFIG)
core_bugs, _ = process_fields(
SAMPLE_CORE_BUGS, {key: RELATION_CONFIG[key] for key in LINK_FIELDS}
)
result = add_links(bugs, core_bugs)
assert result[1835339]["standards_issues"] == [
"https://github.com/w3c/uievents/issues/353"
]
assert result[111111]["standards_positions"] == [
"https://github.com/mozilla/standards-positions/issues/20",
"https://github.com/WebKit/standards-positions/issues/186",
"https://mozilla.github.io/standards-positions/#testposition",
]
def test_add_links_no_core(self):
bugs, _ = process_fields(SAMPLE_BUGS, RELATION_CONFIG)
core_bugs, _ = process_fields(SAMPLE_CORE_BUGS, RELATION_CONFIG)
result = add_links(bugs, {})
assert result[1835339]["standards_issues"] == []
assert result[111111]["standards_positions"] == [
"https://github.com/mozilla/standards-positions/issues/20",
"https://github.com/WebKit/standards-positions/issues/186",
]
def test_merge_relations(self):
bugs, _ = process_fields(SAMPLE_BUGS, RELATION_CONFIG)
relations = build_relations(bugs, RELATION_CONFIG)
url_only_config = {"url_patterns": RELATION_CONFIG["url_patterns"]}
breakage_bugs, _ = process_fields(SAMPLE_BREAKAGE_BUGS, url_only_config)
breakage_relations = build_relations(breakage_bugs, url_only_config)
merged = merge_relations(relations, breakage_relations)
assert merged["url_patterns"] == [
{"bug": 1835339, "url_pattern": "cmcreg.bancosantander.es/*"},
{"bug": 1835339, "url_pattern": "new.reddit.com/*"},
{"bug": 1835339, "url_pattern": "web.whatsapp.com/*"},
{"bug": 1835339, "url_pattern": "facebook.com/*"},
{"bug": 1835339, "url_pattern": "twitter.com/*"},
{"bug": 1835339, "url_pattern": "reddit.com/*"},
{"bug": 1835339, "url_pattern": "mobilevikings.be/*"},
{"bug": 1835339, "url_pattern": "book.ersthelfer.tv/*"},
{"bug": 1835416, "url_pattern": "webminidisc.com/*"},
{"bug": 1835416, "url_pattern": "app.webadb.com/*"},
{"bug": 1835416, "url_pattern": "www.numworks.com/*"},
{"bug": 1835416, "url_pattern": "webadb.github.io/*"},
{"bug": 1835416, "url_pattern": "www.stemplayer.com/*"},
{"bug": 1835416, "url_pattern": "wootility.io/*"},
{"bug": 1835416, "url_pattern": "python.microbit.org/*"},
{"bug": 1835416, "url_pattern": "flash.android.com/*"},
{"bug": 1734557, "url_pattern": "angusnicneven.com/*"},
{"bug": 222222, "url_pattern": "example.com/*"},
]

Просмотреть файл

@ -0,0 +1,360 @@
import click
import logging
import requests
import re
from google.cloud import bigquery
BUGZILLA_API = "https://bugzilla.mozilla.org/rest"
OTHER_BROWSER = ["bugs.chromium.org", "bugs.webkit.org", "crbug.com"]
STANDARDS_ISSUES = ["github.com/w3c", "github.com/whatwg", "github.com/wicg"]
STANDARDS_POSITIONS = ["standards-positions"]
INTERVENTIONS = ["github.com/mozilla-extensions/webcompat-addon"]
FIELD_MAP = {
"blocker": 1,
"critical": 1,
"major": 2,
"normal": 3,
"minor": 4,
"trivial": 4,
"enhancement": 4,
"n/a": None,
"--": None,
}
def get_urls_from_story(user_story_str):
pattern = r"url:(\S*)"
return re.findall(pattern, user_story_str)
RELATION_CONFIG = {
"core_bugs": {
"fields": [
{"name": "knowledge_base_bug", "type": "INTEGER", "mode": "REQUIRED"},
{"name": "core_bug", "type": "INTEGER", "mode": "REQUIRED"},
],
"source": "depends_on",
"store_id": "core",
},
"breakage_reports": {
"fields": [
{"name": "knowledge_base_bug", "type": "INTEGER", "mode": "REQUIRED"},
{"name": "breakage_bug", "type": "INTEGER", "mode": "REQUIRED"},
],
"source": "blocks",
"store_id": "breakage",
},
"interventions": {
"fields": [
{"name": "knowledge_base_bug", "type": "INTEGER", "mode": "REQUIRED"},
{"name": "code_url", "type": "STRING", "mode": "REQUIRED"},
],
"source": "see_also",
"condition": INTERVENTIONS,
},
"other_browser_issues": {
"fields": [
{"name": "knowledge_base_bug", "type": "INTEGER", "mode": "REQUIRED"},
{"name": "issue_url", "type": "STRING", "mode": "REQUIRED"},
],
"source": "see_also",
"condition": OTHER_BROWSER,
},
"standards_issues": {
"fields": [
{"name": "knowledge_base_bug", "type": "INTEGER", "mode": "REQUIRED"},
{"name": "issue_url", "type": "STRING", "mode": "REQUIRED"},
],
"source": "see_also",
"condition": STANDARDS_ISSUES,
},
"standards_positions": {
"fields": [
{"name": "knowledge_base_bug", "type": "INTEGER", "mode": "REQUIRED"},
{"name": "discussion_url", "type": "STRING", "mode": "REQUIRED"},
],
"source": "see_also",
"condition": STANDARDS_POSITIONS,
},
"url_patterns": {
"fields": [
{"name": "bug", "type": "INTEGER", "mode": "REQUIRED"},
{"name": "url_pattern", "type": "STRING", "mode": "REQUIRED"},
],
"source": "cf_user_story",
"custom_func": get_urls_from_story,
},
}
LINK_FIELDS = ["other_browser_issues", "standards_issues", "standards_positions"]
def fetch_bugs(params=None):
if params is None:
params = {}
fields = [
"id",
"summary",
"status",
"resolution",
"product",
"component",
"see_also",
"depends_on",
"blocks",
"priority",
"severity",
"cf_user_story",
]
url = f"{BUGZILLA_API}/bug"
params["include_fields"] = ",".join(fields)
response = requests.get(url, params=params)
response.raise_for_status()
result = response.json()
return result["bugs"]
def process_individual_bug(bug, relation_config, processed_bugs, bug_ids):
bug_id = bug["id"]
if bug_id not in processed_bugs:
processed_bugs[bug_id] = {rel: [] for rel in relation_config.keys()}
for rel, config in relation_config.items():
if "custom_func" in config:
source_data = config["custom_func"](bug[config["source"]])
else:
source_data = bug[config["source"]]
for data in source_data:
if "condition" in config and not any(
c in data for c in config["condition"]
):
continue
processed_bugs[bug_id][rel].append(data)
if config.get("store_id"):
if config["store_id"] not in bug_ids:
bug_ids[config["store_id"]] = []
bug_ids[config["store_id"]].append(data)
return processed_bugs, bug_ids
def process_fields(bug_list, relation_config):
processed_bugs = {}
bug_ids = {}
for bug in bug_list:
processed_bugs, bug_ids = process_individual_bug(
bug, relation_config, processed_bugs, bug_ids
)
return processed_bugs, bug_ids
def add_links(kb_processed, dep_processed):
result = {**kb_processed}
for key in result:
for bug_id in result[key]["core_bugs"]:
for sub_key in LINK_FIELDS:
if sub_key in result[key] and sub_key in dep_processed.get(bug_id, {}):
result[key][sub_key].extend(
x
for x in dep_processed[bug_id][sub_key]
if x not in result[key][sub_key]
)
return result
def build_relations(bugs, relation_config):
relations = {key: [] for key in relation_config.keys()}
for bug_id, data in bugs.items():
for field_key, items in data.items():
fields = relation_config[field_key]["fields"]
for row in items:
relation_row = {fields[0]["name"]: bug_id, fields[1]["name"]: row}
relations[field_key].append(relation_row)
return relations
def merge_relations(main_dict, additional_dict):
for key, value in additional_dict.items():
if key in main_dict:
main_dict[key].extend(value)
else:
main_dict[key] = value
return main_dict
def split_bugs(dep_bugs, bug_ids):
core_bugs, breakage_bugs = [], []
for bug in dep_bugs:
if bug["id"] in bug_ids.get("core", []):
core_bugs.append(bug)
elif bug["id"] in bug_ids.get("breakage", []):
breakage_bugs.append(bug)
return core_bugs, breakage_bugs
def extract_int_from_field(field):
if field:
if field.lower() in FIELD_MAP:
return FIELD_MAP[field.lower()]
match = re.search(r"\d+", field)
if match:
return int(match.group())
return None
def update_relations(relations, client, bq_dataset_id):
for key, value in relations.items():
if value:
job_config = bigquery.LoadJobConfig(
source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
schema=[
bigquery.SchemaField(item["name"], item["type"], mode=item["mode"])
for item in RELATION_CONFIG[key]["fields"]
],
write_disposition="WRITE_TRUNCATE",
)
relation_table = f"{bq_dataset_id}.{key}"
job = client.load_table_from_json(
value, relation_table, job_config=job_config
)
logging.info(f"Writing to `{relation_table}` table")
try:
job.result()
except Exception as e:
print(f"ERROR: {e}")
if job.errors:
for error in job.errors:
logging.error(error)
table = client.get_table(relation_table)
logging.info(f"Loaded {table.num_rows} rows into {table}")
def update_bugs(bugs, client, bq_dataset_id):
res = []
for bug in bugs:
bq_bug = {
"number": bug["id"],
"title": bug["summary"],
"status": bug["status"],
"resolution": bug["resolution"],
"product": bug["product"],
"component": bug["component"],
"severity": extract_int_from_field(bug["severity"]),
"priority": extract_int_from_field(bug["priority"]),
}
res.append(bq_bug)
job_config = bigquery.LoadJobConfig(
source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON,
schema=[
bigquery.SchemaField("number", "INTEGER", mode="REQUIRED"),
bigquery.SchemaField("title", "STRING", mode="REQUIRED"),
bigquery.SchemaField("status", "STRING", mode="REQUIRED"),
bigquery.SchemaField("resolution", "STRING", mode="REQUIRED"),
bigquery.SchemaField("product", "STRING", mode="REQUIRED"),
bigquery.SchemaField("component", "STRING", mode="REQUIRED"),
bigquery.SchemaField("severity", "INTEGER"),
bigquery.SchemaField("priority", "INTEGER"),
],
write_disposition="WRITE_TRUNCATE",
)
bugs_table = f"{bq_dataset_id}.bugzilla_bugs"
job = client.load_table_from_json(
res,
bugs_table,
job_config=job_config,
)
logging.info("Writing to `bugzilla_bugs` table")
try:
job.result()
except Exception as e:
print(f"ERROR: {e}")
if job.errors:
for error in job.errors:
logging.error(error)
table = client.get_table(bugs_table)
logging.info(f"Loaded {table.num_rows} rows into {table}")
@click.command()
@click.option("--bq_project_id", help="BigQuery project id", required=True)
@click.option("--bq_dataset_id", help="BigQuery dataset id", required=True)
def main(bq_project_id, bq_dataset_id):
client = bigquery.Client(project=bq_project_id)
logging.info("Fetching KB bugs from bugzilla")
bug_filters = {
"product": "Web Compatibility",
"component": "Knowledge Base",
}
kb_bugs = fetch_bugs(bug_filters)
# Process KB bugs fields and get their dependant core/breakage bugs ids.
kb_data, kb_ids = process_fields(kb_bugs, RELATION_CONFIG)
dep_ids = set(item for sublist in kb_ids.values() for item in sublist)
if not dep_ids:
return
logging.info("Fetching core bugs and breakage reports from bugzilla")
dep_bugs = fetch_bugs({"id": ",".join(map(str, dep_ids))})
kb_bugs.extend(dep_bugs)
# Separate bugs into core and breakage.
core_bugs, breakage_bugs = split_bugs(dep_bugs, kb_ids)
# Process core bugs and update KB data with missing links from core bugs.
if core_bugs:
core_config = {key: RELATION_CONFIG[key] for key in LINK_FIELDS}
core_data, _ = process_fields(core_bugs, core_config)
kb_data = add_links(kb_data, core_data)
# Build relations for BQ.
rels = build_relations(kb_data, RELATION_CONFIG)
# Process breakage bugs for url patterns and merge them to the existing relations.
if breakage_bugs:
break_config = {"url_patterns": RELATION_CONFIG["url_patterns"]}
break_data, _ = process_fields(breakage_bugs, break_config)
add = build_relations(break_data, break_config)
rels = merge_relations(rels, add)
update_bugs(kb_bugs, client, bq_dataset_id)
update_relations(rels, client, bq_dataset_id)
if __name__ == "__main__":
logging.getLogger().setLevel(logging.INFO)
main()