Metric-hub integration (#3696)
* Metric-hub integration * Add metrics.data_source()
This commit is contained in:
Родитель
3e6ecebc4c
Коммит
10cbb52126
|
@ -0,0 +1,51 @@
|
|||
"""Metric-hub integration."""
|
||||
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
import attr
|
||||
from metric_config_parser.config import ConfigCollection
|
||||
|
||||
|
||||
@attr.s(auto_attribs=True, slots=True)
|
||||
class MetricHub:
|
||||
"""Metric-hub integration for generating SQL from referenced metrics."""
|
||||
|
||||
_config_collection: Optional[ConfigCollection] = None
|
||||
|
||||
@property
|
||||
def config_collection(self):
|
||||
"""Config collection instance."""
|
||||
self._config_collection = (
|
||||
self._config_collection or ConfigCollection.from_github_repo()
|
||||
)
|
||||
return self._config_collection
|
||||
|
||||
def calculate(
|
||||
self,
|
||||
metrics: List[str],
|
||||
platform: str,
|
||||
group_by: Union[List[str], Dict[str, str]] = [],
|
||||
where: Optional[str] = None,
|
||||
group_by_client_id: bool = True,
|
||||
group_by_submission_date: bool = True,
|
||||
) -> str:
|
||||
"""Generate SQL query for specified metrics."""
|
||||
return self.config_collection.get_metrics_sql(
|
||||
metrics=metrics,
|
||||
platform=platform,
|
||||
group_by=group_by,
|
||||
where=where,
|
||||
group_by_client_id=group_by_client_id,
|
||||
group_by_submission_date=group_by_submission_date,
|
||||
)
|
||||
|
||||
def data_source(
|
||||
self,
|
||||
data_source: str,
|
||||
platform: str,
|
||||
where: Optional[str] = None,
|
||||
) -> str:
|
||||
"""Generate SQL query for specified data source."""
|
||||
return self.config_collection.get_data_source_sql(
|
||||
data_source=data_source, platform=platform, where=where
|
||||
)
|
|
@ -13,6 +13,7 @@ from google.cloud import bigquery
|
|||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
from bigquery_etl.format_sql.formatter import reformat
|
||||
from bigquery_etl.metrics import MetricHub
|
||||
|
||||
# Search for all camelCase situations in reverse with arbitrary lookaheads.
|
||||
REV_WORD_BOUND_PAT = re.compile(
|
||||
|
@ -63,7 +64,10 @@ def render(
|
|||
file_loader = FileSystemLoader(f"{template_folder}")
|
||||
env = Environment(loader=file_loader)
|
||||
main_sql = env.get_template(sql_filename)
|
||||
rendered = main_sql.render(**kwargs)
|
||||
if "metrics" not in kwargs:
|
||||
rendered = main_sql.render(**kwargs, metrics=MetricHub())
|
||||
else:
|
||||
rendered = main_sql.render(**kwargs)
|
||||
if format:
|
||||
rendered = reformat(rendered)
|
||||
return rendered
|
||||
|
|
|
@ -16,6 +16,7 @@ mdx_truly_sane_lists==1.3
|
|||
mkdocs==1.4.2
|
||||
mkdocs-material==8.5.7
|
||||
mkdocs-awesome-pages-plugin==2.8.0
|
||||
mozilla-metric-config-parser==2023.4.2
|
||||
mozilla-schema-generator==0.5.1
|
||||
pandas==1.5.1
|
||||
pathos==0.2.9
|
||||
|
|
|
@ -109,6 +109,7 @@ attrs==22.1.0 \
|
|||
# aiohttp
|
||||
# cattrs
|
||||
# jsonschema
|
||||
# mozilla-metric-config-parser
|
||||
# pytest
|
||||
# pytest-mypy
|
||||
authlib==1.2.0 \
|
||||
|
@ -159,7 +160,9 @@ cachetools==5.3.0 \
|
|||
cattrs==22.2.0 \
|
||||
--hash=sha256:bc12b1f0d000b9f9bee83335887d532a1d3e99a833d1bf0882151c97d3e68c21 \
|
||||
--hash=sha256:f0eed5642399423cf656e7b66ce92cdc5b963ecafd041d1b24d136fdde7acf6d
|
||||
# via -r requirements.in
|
||||
# via
|
||||
# -r requirements.in
|
||||
# mozilla-metric-config-parser
|
||||
certifi==2022.12.7 \
|
||||
--hash=sha256:35824b4c3a97115964b408844d64aa14db1cc518f6562e8d7261699d1350a9e3 \
|
||||
--hash=sha256:4ad3232f5e926d6718ec31cfc1fcadfde020920e278684144551c91769c7bc18
|
||||
|
@ -247,6 +250,7 @@ click==8.1.3 \
|
|||
# -r requirements.in
|
||||
# black
|
||||
# mkdocs
|
||||
# mozilla-metric-config-parser
|
||||
# mozilla-schema-generator
|
||||
# pip-tools
|
||||
cryptography==39.0.1 \
|
||||
|
@ -412,6 +416,7 @@ gitpython==3.1.30 \
|
|||
--hash=sha256:cd455b0000615c60e286208ba540271af9fe531fa6a87cc590a7298785ab2882
|
||||
# via
|
||||
# -r requirements.in
|
||||
# mozilla-metric-config-parser
|
||||
# mozilla-schema-generator
|
||||
google-api-core[grpc]==2.11.0 \
|
||||
--hash=sha256:4b9bb5d5a380a0befa0573b302651b8a9a89262c1730e37bf423cec511804c22 \
|
||||
|
@ -617,6 +622,7 @@ jinja2==3.1.2 \
|
|||
# -r requirements.in
|
||||
# mkdocs
|
||||
# mkdocs-material
|
||||
# mozilla-metric-config-parser
|
||||
jsonschema==4.17.0 \
|
||||
--hash=sha256:5bfcf2bca16a087ade17e02b282d34af7ccd749ef76241e7f9bd7c0cb8a9424d \
|
||||
--hash=sha256:f660066c3966db7d6daeaea8a75e0b68237a48e51cf49882087757bb59916248
|
||||
|
@ -718,6 +724,10 @@ mkdocs-material-extensions==1.1.1 \
|
|||
--hash=sha256:9c003da71e2cc2493d910237448c672e00cefc800d3d6ae93d2fc69979e3bd93 \
|
||||
--hash=sha256:e41d9f38e4798b6617ad98ca8f7f1157b1e4385ac1459ca1e4ea219b556df945
|
||||
# via mkdocs-material
|
||||
mozilla-metric-config-parser==2023.4.2 \
|
||||
--hash=sha256:183637ec38e5a92b0130eb695d078c02fa73e36480e5b8a6a207d78dc19ce4ab \
|
||||
--hash=sha256:674214af52949a64dd8af0f4fba8bcdc9eb3135a6bcdae91113c94956bc91f80
|
||||
# via -r requirements.in
|
||||
mozilla-schema-generator==0.5.1 \
|
||||
--hash=sha256:77109d64d0fd55b2579568e9a8f7c52d8eeed7e2a254b3262dd206ed21ffad38 \
|
||||
--hash=sha256:957dcb990d67436cfdabc8878cea1102040920f00cd5c5a5a4962344000bb26b
|
||||
|
@ -1187,7 +1197,9 @@ python-dateutil==2.8.2 \
|
|||
pytz==2022.7.1 \
|
||||
--hash=sha256:01a0681c4b9684a28304615eba55d1ab31ae00bf68ec157ec3708a8182dbbcd0 \
|
||||
--hash=sha256:78f4f37d8198e0627c5f1143240bb0206b8691d8d7ac6d78fee88b78733f8c4a
|
||||
# via pandas
|
||||
# via
|
||||
# mozilla-metric-config-parser
|
||||
# pandas
|
||||
pyyaml==6.0 \
|
||||
--hash=sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf \
|
||||
--hash=sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293 \
|
||||
|
@ -1249,6 +1261,7 @@ requests==2.28.2 \
|
|||
# google-cloud-bigquery
|
||||
# google-cloud-storage
|
||||
# mkdocs-material
|
||||
# mozilla-metric-config-parser
|
||||
# mozilla-schema-generator
|
||||
# requests-oauthlib
|
||||
# stripe
|
||||
|
@ -1295,6 +1308,7 @@ toml==0.10.2 \
|
|||
--hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \
|
||||
--hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f
|
||||
# via
|
||||
# mozilla-metric-config-parser
|
||||
# pre-commit
|
||||
# pytest-black
|
||||
tomli==2.0.1 \
|
||||
|
|
|
@ -174,3 +174,21 @@ class TestDryRun:
|
|||
).get_error()
|
||||
is None
|
||||
)
|
||||
|
||||
def test_dryrun_metrics_query(self, tmp_query_path):
|
||||
query_file = tmp_query_path / "query.sql"
|
||||
query_file.write_text(
|
||||
"""
|
||||
SELECT * FROM (
|
||||
{{ metrics.calculate(
|
||||
metrics=['days_of_use', 'uri_count', 'ad_clicks'],
|
||||
platform='firefox_desktop',
|
||||
group_by={'sample_id': 'sample_id'},
|
||||
where='submission_date = "2023-01-01"'
|
||||
) }}
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
dryrun = DryRun(sqlfile=str(query_file))
|
||||
assert dryrun.is_valid()
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
from bigquery_etl.util.common import project_dirs
|
||||
import pytest
|
||||
|
||||
from bigquery_etl.util.common import project_dirs, render
|
||||
|
||||
|
||||
class TestUtilCommon:
|
||||
|
@ -7,3 +9,79 @@ class TestUtilCommon:
|
|||
|
||||
existing_projects = project_dirs()
|
||||
assert "sql/moz-fx-data-shared-prod" in existing_projects
|
||||
|
||||
def test_metrics_render(self, tmp_path):
|
||||
file_path = tmp_path / "test_query.sql"
|
||||
file_path.write_text(
|
||||
r"""
|
||||
SELECT * FROM (
|
||||
{{ metrics.calculate(
|
||||
metrics=['days_of_use'],
|
||||
platform='firefox_desktop'
|
||||
) }}
|
||||
)
|
||||
"""
|
||||
)
|
||||
rendered_sql = render(file_path.name, template_folder=file_path.parent)
|
||||
assert r"{{ metrics.calculate" not in rendered_sql
|
||||
assert "days_of_use" in rendered_sql
|
||||
|
||||
def test_non_existing_metrics_render(self, tmp_path):
|
||||
file_path = tmp_path / "test_query.sql"
|
||||
file_path.write_text(
|
||||
r"""
|
||||
SELECT * FROM (
|
||||
{{ metrics.calculate(
|
||||
metrics=['not-existing'],
|
||||
platform='firefox_desktop'
|
||||
) }}
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
render(file_path.name, template_folder=file_path.parent)
|
||||
|
||||
def test_render_multiple_metrics(self, tmp_path):
|
||||
file_path = tmp_path / "test_query.sql"
|
||||
file_path.write_text(
|
||||
r"""
|
||||
SELECT * FROM (
|
||||
{{ metrics.calculate(
|
||||
metrics=['days_of_use', 'uri_count', 'ad_clicks'],
|
||||
platform='firefox_desktop',
|
||||
group_by={'sample_id': 'sample_id'},
|
||||
where='submission_date = "2023-01-01"'
|
||||
) }}
|
||||
)
|
||||
"""
|
||||
)
|
||||
rendered_sql = render(file_path.name, template_folder=file_path.parent)
|
||||
assert "metrics.calculate" not in rendered_sql
|
||||
assert r"{{" not in rendered_sql
|
||||
assert "days_of_use" in rendered_sql
|
||||
assert "clients_daily" in rendered_sql
|
||||
assert "uri_count" in rendered_sql
|
||||
assert "ad_clicks" in rendered_sql
|
||||
assert "mozdata.search.search_clients_engines_sources_daily" in rendered_sql
|
||||
assert 'submission_date = "2023-01-01"' in rendered_sql
|
||||
assert "sample_id" in rendered_sql
|
||||
|
||||
def test_render_data_source(self, tmp_path):
|
||||
file_path = tmp_path / "test_query.sql"
|
||||
file_path.write_text(
|
||||
r"""
|
||||
SELECT * FROM (
|
||||
{{ metrics.data_source(
|
||||
data_source="main",
|
||||
platform='firefox_desktop',
|
||||
where='submission_date = "2023-01-01"'
|
||||
) }}
|
||||
)
|
||||
"""
|
||||
)
|
||||
rendered_sql = render(file_path.name, template_folder=file_path.parent)
|
||||
assert "metrics.data_source" not in rendered_sql
|
||||
assert r"{{" not in rendered_sql
|
||||
assert "main" in rendered_sql
|
||||
assert 'submission_date = "2023-01-01"' in rendered_sql
|
||||
|
|
Загрузка…
Ссылка в новой задаче