335 строки
10 KiB
Python
335 строки
10 KiB
Python
"""Generate lookml from namespaces."""
|
|
|
|
import logging
|
|
from functools import partial
|
|
from multiprocessing.pool import Pool
|
|
from pathlib import Path
|
|
from typing import Any, Dict, Iterable, Optional
|
|
|
|
import click
|
|
import lkml
|
|
import yaml
|
|
|
|
from generator.utils import get_file_from_looker_hub
|
|
|
|
from .dashboards import DASHBOARD_TYPES
|
|
from .dryrun import DryRunContext, DryRunError, Errors, credentials, id_token
|
|
from .explores import EXPLORE_TYPES
|
|
from .metrics_utils import LOOKER_METRIC_HUB_REPO, METRIC_HUB_REPO, MetricsConfigLoader
|
|
from .namespaces import _get_glean_apps
|
|
from .views import VIEW_TYPES, View, ViewDict
|
|
from .views.datagroups import generate_datagroup
|
|
|
|
FILE_HEADER = """
|
|
# *Do not manually modify this file*
|
|
#
|
|
# This file has been generated via https://github.com/mozilla/lookml-generator
|
|
# You can extend this view in the looker-spoke-default project (https://github.com/mozilla/looker-spoke-default)
|
|
|
|
"""
|
|
|
|
|
|
def _generate_view(
|
|
out_dir: Path,
|
|
view: View,
|
|
v1_name: Optional[str],
|
|
dryrun,
|
|
) -> Optional[Path]:
|
|
logging.info(
|
|
f"Generating lookml for view {view.name} in {view.namespace} of type {view.view_type}"
|
|
)
|
|
path = out_dir / f"{view.name}.view.lkml"
|
|
|
|
try:
|
|
lookml = view.to_lookml(v1_name, dryrun)
|
|
if lookml == {}:
|
|
return None
|
|
|
|
# lkml.dump may return None, in which case write an empty file
|
|
path.write_text(FILE_HEADER + (lkml.dump(lookml) or ""))
|
|
return path
|
|
except DryRunError as e:
|
|
if e.error == Errors.PERMISSION_DENIED and e.use_cloud_function:
|
|
print(
|
|
f"Permission error dry running {view.name}. Copy existing {path} file from looker-hub."
|
|
)
|
|
try:
|
|
get_file_from_looker_hub(path)
|
|
return path
|
|
except Exception as ex:
|
|
print(f"Skip generating view for {path}: {ex}")
|
|
return None
|
|
else:
|
|
raise
|
|
|
|
|
|
def _generate_explore(
|
|
out_dir: Path,
|
|
namespace: str,
|
|
explore_name: str,
|
|
explore_info: Any,
|
|
views_dir: Path,
|
|
v1_name: Optional[
|
|
str
|
|
], # v1_name for Glean explores: see: https://mozilla.github.io/probe-scraper/#tag/library
|
|
) -> Path:
|
|
logging.info(f"Generating lookml for explore {explore_name} in {namespace}")
|
|
explore_by_type = EXPLORE_TYPES[explore_info["type"]].from_dict(
|
|
explore_name, explore_info, views_dir
|
|
)
|
|
file_lookml = {
|
|
# Looker validates all included files,
|
|
# so if we're not explicit about files here, validation takes
|
|
# forever as looker re-validates all views for every explore (if we used *).
|
|
"includes": [
|
|
f"/looker-hub/{namespace}/views/{view}.view.lkml"
|
|
for view in explore_by_type.get_dependent_views()
|
|
],
|
|
"explores": explore_by_type.to_lookml(v1_name),
|
|
}
|
|
path = out_dir / (explore_name + ".explore.lkml")
|
|
# lkml.dump may return None, in which case write an empty file
|
|
path.write_text(FILE_HEADER + (lkml.dump(file_lookml) or ""))
|
|
return path
|
|
|
|
|
|
def _generate_dashboard(
|
|
dash_dir: Path,
|
|
namespace: str,
|
|
dashboard_name: str,
|
|
dashboard_info: Any,
|
|
):
|
|
logging.info(f"Generating lookml for dashboard {dashboard_name} in {namespace}")
|
|
dashboard = DASHBOARD_TYPES[dashboard_info["type"]].from_dict(
|
|
namespace, dashboard_name, dashboard_info
|
|
)
|
|
|
|
dashboard_lookml = dashboard.to_lookml()
|
|
dash_path = dash_dir / f"{dashboard_name}.dashboard.lookml"
|
|
dash_path.write_text(FILE_HEADER + dashboard_lookml)
|
|
return dash_path
|
|
|
|
|
|
def _get_views_from_dict(views: Dict[str, ViewDict], namespace: str) -> Iterable[View]:
|
|
for view_name, view_info in views.items():
|
|
yield VIEW_TYPES[view_info["type"]].from_dict( # type: ignore
|
|
namespace, view_name, view_info
|
|
)
|
|
|
|
|
|
def _glean_apps_to_v1_map(glean_apps):
|
|
return {d["name"]: d["v1_name"] for d in glean_apps}
|
|
|
|
|
|
def _run_generation(func):
|
|
"""
|
|
Run the partially applied generate function.
|
|
|
|
For parallel execution.
|
|
"""
|
|
return func()
|
|
|
|
|
|
def _update_metric_repos(metric_hub_repos):
|
|
"""Update metric hub repos when initializing the processes."""
|
|
MetricsConfigLoader.update_repos(metric_hub_repos)
|
|
|
|
|
|
def _lookml(
|
|
namespaces,
|
|
glean_apps,
|
|
target_dir,
|
|
dryrun,
|
|
namespace_filter=[],
|
|
parallelism: int = 8,
|
|
metric_hub_repos=[],
|
|
):
|
|
namespaces_content = namespaces.read()
|
|
_namespaces = yaml.safe_load(namespaces_content)
|
|
target = Path(target_dir)
|
|
target.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Write namespaces file to target directory, for use
|
|
# by the Glean Dictionary and other tools
|
|
with open(target / "namespaces.yaml", "w") as target_namespaces_file:
|
|
target_namespaces_file.write(namespaces_content)
|
|
|
|
generate_views = []
|
|
generate_datagroups = []
|
|
generate_explores = []
|
|
generate_dashboards = []
|
|
v1_mapping = _glean_apps_to_v1_map(glean_apps)
|
|
|
|
for namespace, lookml_objects in _namespaces.items():
|
|
if len(namespace_filter) == 0 or namespace in namespace_filter:
|
|
view_dir = target / namespace / "views"
|
|
view_dir.mkdir(parents=True, exist_ok=True)
|
|
views = list(
|
|
_get_views_from_dict(lookml_objects.get("views", {}), namespace)
|
|
)
|
|
|
|
v1_name: Optional[str] = v1_mapping.get(namespace)
|
|
for view in views:
|
|
generate_views.append(
|
|
partial(
|
|
_generate_view,
|
|
view_dir,
|
|
view,
|
|
v1_name,
|
|
dryrun,
|
|
)
|
|
)
|
|
generate_datagroups.append(
|
|
partial(
|
|
generate_datagroup,
|
|
view,
|
|
target,
|
|
namespace,
|
|
dryrun,
|
|
)
|
|
)
|
|
|
|
explore_dir = target / namespace / "explores"
|
|
explore_dir.mkdir(parents=True, exist_ok=True)
|
|
explores = lookml_objects.get("explores", {})
|
|
generate_explores += [
|
|
partial(
|
|
_generate_explore,
|
|
explore_dir,
|
|
namespace,
|
|
explore_name,
|
|
explore,
|
|
view_dir,
|
|
v1_name,
|
|
)
|
|
for explore_name, explore in explores.items()
|
|
]
|
|
|
|
dashboard_dir = target / namespace / "dashboards"
|
|
dashboard_dir.mkdir(parents=True, exist_ok=True)
|
|
dashboards = lookml_objects.get("dashboards", {})
|
|
generate_dashboards += [
|
|
partial(
|
|
_generate_dashboard,
|
|
dashboard_dir,
|
|
namespace,
|
|
dashboard_name,
|
|
dashboard,
|
|
)
|
|
for dashboard_name, dashboard in dashboards.items()
|
|
]
|
|
|
|
if parallelism == 1:
|
|
# run without using multiprocessing
|
|
# this is needed for the unit tests to work as mocks are not shared across processes
|
|
logging.info(" Generating views")
|
|
for generate_view_func in generate_views:
|
|
generate_view_func()
|
|
logging.info(" Generating datagroups")
|
|
for generate_datagroup_func in generate_datagroups:
|
|
generate_datagroup_func()
|
|
logging.info(" Generating explores")
|
|
for generate_explore_func in generate_explores:
|
|
generate_explore_func()
|
|
logging.info(" Generating dashboards")
|
|
for generate_dashboard_func in generate_dashboards:
|
|
generate_dashboard_func()
|
|
else:
|
|
with Pool(
|
|
parallelism, initializer=partial(_update_metric_repos, metric_hub_repos)
|
|
) as pool:
|
|
logging.info(" Generating views and datagroups")
|
|
pool.map(_run_generation, generate_views + generate_datagroups)
|
|
logging.info(" Generating explores")
|
|
pool.map(
|
|
_run_generation,
|
|
generate_explores,
|
|
)
|
|
logging.info(" Generating dashboards")
|
|
pool.map(
|
|
_run_generation,
|
|
generate_dashboards,
|
|
)
|
|
|
|
|
|
@click.command(help=__doc__)
|
|
@click.option(
|
|
"--namespaces",
|
|
default="namespaces.yaml",
|
|
type=click.File(),
|
|
help="Path to a yaml namespaces file",
|
|
)
|
|
@click.option(
|
|
"--app-listings-uri",
|
|
default="https://probeinfo.telemetry.mozilla.org/v2/glean/app-listings",
|
|
help="URI for probeinfo service v2 glean app listings",
|
|
)
|
|
@click.option(
|
|
"--target-dir",
|
|
default="looker-hub/",
|
|
type=click.Path(),
|
|
help="Path to a directory where lookml will be written",
|
|
)
|
|
@click.option(
|
|
"--metric-hub-repos",
|
|
"--metric-hub-repos",
|
|
multiple=True,
|
|
default=[METRIC_HUB_REPO, LOOKER_METRIC_HUB_REPO],
|
|
help="Repos to load metric configs from.",
|
|
)
|
|
@click.option(
|
|
"--only",
|
|
multiple=True,
|
|
default=[],
|
|
help="List of namespace names to generate lookml for.",
|
|
)
|
|
@click.option(
|
|
"--use_cloud_function",
|
|
"--use-cloud-function",
|
|
help="Use the Cloud Function to run dry runs during LookML generation.",
|
|
type=bool,
|
|
)
|
|
@click.option(
|
|
"--parallelism",
|
|
"-p",
|
|
default=8,
|
|
type=int,
|
|
help="Number of processes to use for LookML generation",
|
|
)
|
|
def lookml(
|
|
namespaces,
|
|
app_listings_uri,
|
|
target_dir,
|
|
metric_hub_repos,
|
|
only,
|
|
use_cloud_function,
|
|
parallelism,
|
|
):
|
|
"""Generate lookml from namespaces."""
|
|
if metric_hub_repos:
|
|
MetricsConfigLoader.update_repos(metric_hub_repos)
|
|
glean_apps = _get_glean_apps(app_listings_uri)
|
|
|
|
dry_run_id_token = None
|
|
creds = None
|
|
if use_cloud_function:
|
|
dry_run_id_token = id_token()
|
|
else:
|
|
creds = credentials()
|
|
|
|
dryrun = DryRunContext(
|
|
use_cloud_function=use_cloud_function,
|
|
id_token=dry_run_id_token,
|
|
credentials=creds,
|
|
)
|
|
|
|
return _lookml(
|
|
namespaces,
|
|
glean_apps,
|
|
target_dir,
|
|
dryrun,
|
|
only,
|
|
parallelism,
|
|
metric_hub_repos,
|
|
)
|