Move routine config to bqetl_project.yaml (#4038)
This commit is contained in:
Родитель
8ccd05433a
Коммит
8d72cfa9fe
|
@ -14,7 +14,13 @@ import pytest
|
|||
import yaml
|
||||
|
||||
from ..cli.format import format
|
||||
from ..cli.utils import is_authenticated, is_valid_dir, is_valid_project
|
||||
from ..cli.utils import (
|
||||
is_authenticated,
|
||||
is_valid_project,
|
||||
project_id_option,
|
||||
sql_dir_option,
|
||||
)
|
||||
from ..config import ConfigLoader
|
||||
from ..docs import validate as validate_docs
|
||||
from ..format_sql.formatter import reformat
|
||||
from ..routine import publish_routines
|
||||
|
@ -27,10 +33,6 @@ ROUTINE_FILE_RE = re.compile(
|
|||
r"^.*/([a-zA-Z0-9-]+)/([a-zA-Z0-9_]+)/([a-zA-Z0-9_]+)/"
|
||||
r"(udf\.sql|stored_procedure\.sql)$"
|
||||
)
|
||||
DEFAULT_UDF_DEPENDENCY_DIR = "udf_js_lib/"
|
||||
DEFAULT_GCS_BUCKET = "moz-fx-data-prod-bigquery-etl"
|
||||
DEFAULT_GCS_PATH = ""
|
||||
DEFAULT_PROJECT_ID = "moz-fx-data-shared-prod"
|
||||
|
||||
|
||||
def _routines_matching_name_pattern(pattern, sql_path, project_id):
|
||||
|
@ -57,22 +59,6 @@ def _routines_matching_name_pattern(pattern, sql_path, project_id):
|
|||
return routine_files
|
||||
|
||||
|
||||
sql_dir_option = click.option(
|
||||
"--sql_dir",
|
||||
help="Path to directory which contains queries.",
|
||||
type=click.Path(file_okay=False),
|
||||
default="sql",
|
||||
callback=is_valid_dir,
|
||||
)
|
||||
|
||||
project_id_option = click.option(
|
||||
"--project-id",
|
||||
"--project_id",
|
||||
help="GCP project ID",
|
||||
callback=lambda *args: is_valid_project(*args) if args[-1] else args[-1],
|
||||
)
|
||||
|
||||
|
||||
def get_project_id(ctx, project_id=None):
|
||||
"""Return the project id with the option flag taking priority."""
|
||||
if project_id:
|
||||
|
@ -92,7 +78,7 @@ def get_project_id(ctx, project_id=None):
|
|||
def routine(ctx):
|
||||
"""Create the CLI group for the routine command."""
|
||||
ctx.ensure_object(dict)
|
||||
ctx.obj["DEFAULT_PROJECT"] = "moz-fx-data-shared-prod"
|
||||
ctx.obj["DEFAULT_PROJECT"] = ConfigLoader.get("default", "project")
|
||||
|
||||
|
||||
@click.group(help="Commands for managing public mozfun routines.")
|
||||
|
@ -100,7 +86,7 @@ def routine(ctx):
|
|||
def mozfun(ctx):
|
||||
"""Create the CLI group for the mozfun command."""
|
||||
ctx.ensure_object(dict)
|
||||
ctx.obj["DEFAULT_PROJECT"] = "mozfun"
|
||||
ctx.obj["DEFAULT_PROJECT"] = ConfigLoader.get("routine", "project")
|
||||
|
||||
|
||||
@routine.command(
|
||||
|
@ -124,7 +110,7 @@ def mozfun(ctx):
|
|||
)
|
||||
@click.argument("name")
|
||||
@sql_dir_option
|
||||
@project_id_option
|
||||
@project_id_option()
|
||||
@click.option("--udf", "-u", is_flag=True, help="Create a new UDF", default=False)
|
||||
@click.option(
|
||||
"--stored_procedure",
|
||||
|
@ -270,7 +256,7 @@ Examples:
|
|||
)
|
||||
@click.argument("name", required=False)
|
||||
@sql_dir_option
|
||||
@project_id_option
|
||||
@project_id_option()
|
||||
@click.option("--usages", "-u", is_flag=True, help="Show routine usages", default=False)
|
||||
@click.pass_context
|
||||
def info(ctx, name, sql_dir, project_id, usages):
|
||||
|
@ -352,7 +338,7 @@ Examples:
|
|||
)
|
||||
@click.argument("name", required=False)
|
||||
@sql_dir_option
|
||||
@project_id_option
|
||||
@project_id_option()
|
||||
@click.option(
|
||||
"--docs-only",
|
||||
"--docs_only",
|
||||
|
@ -410,23 +396,23 @@ Examples:
|
|||
""",
|
||||
)
|
||||
@click.argument("name", required=False)
|
||||
@project_id_option
|
||||
@project_id_option()
|
||||
@click.option(
|
||||
"--dependency-dir",
|
||||
"--dependency_dir",
|
||||
default=DEFAULT_UDF_DEPENDENCY_DIR,
|
||||
default=ConfigLoader.get("routine", "dependency_dir"),
|
||||
help="The directory JavaScript dependency files for UDFs are stored.",
|
||||
)
|
||||
@click.option(
|
||||
"--gcs-bucket",
|
||||
"--gcs_bucket",
|
||||
default=DEFAULT_GCS_BUCKET,
|
||||
default=ConfigLoader.get("routine", "publish", "gcs_bucket"),
|
||||
help="The GCS bucket where dependency files are uploaded to.",
|
||||
)
|
||||
@click.option(
|
||||
"--gcs-path",
|
||||
"--gcs_path",
|
||||
default=DEFAULT_GCS_PATH,
|
||||
default=ConfigLoader.get("routine", "publish", "gcs_path"),
|
||||
help="The GCS path in the bucket where dependency files are uploaded to.",
|
||||
)
|
||||
@click.option(
|
||||
|
@ -484,7 +470,7 @@ by Airflow only."""
|
|||
@click.argument("name", required=True)
|
||||
@click.argument("new_name", required=True)
|
||||
@sql_dir_option
|
||||
@project_id_option
|
||||
@project_id_option()
|
||||
@click.pass_context
|
||||
def rename(ctx, name, new_name, sql_dir, project_id):
|
||||
"""Rename routines based on pattern."""
|
||||
|
|
|
@ -10,6 +10,7 @@ import click
|
|||
from google.auth.exceptions import DefaultCredentialsError
|
||||
from google.cloud import bigquery
|
||||
|
||||
from bigquery_etl.config import ConfigLoader
|
||||
from bigquery_etl.util.common import TempDatasetReference, project_dirs
|
||||
|
||||
QUERY_FILE_RE = re.compile(
|
||||
|
@ -20,7 +21,6 @@ CHECKS_FILE_RE = re.compile(
|
|||
r"^.*/([a-zA-Z0-9-]+)/([a-zA-Z0-9_]+)/([a-zA-Z0-9_]+(_v[0-9]+)?)/"
|
||||
r"(?:checks\.sql)$"
|
||||
)
|
||||
TEST_PROJECT = "bigquery-etl-integration-test"
|
||||
MOZDATA = "mozdata"
|
||||
PIONEER_NONPROD = "moz-fx-data-pioneer-nonprod"
|
||||
PIONEER_PROD = "moz-fx-data-pioneer-prod"
|
||||
|
@ -57,7 +57,7 @@ def is_valid_project(ctx, param, value):
|
|||
or value
|
||||
in [Path(p).name for p in project_dirs()]
|
||||
+ [
|
||||
TEST_PROJECT,
|
||||
ConfigLoader.get("default", "test_project"),
|
||||
MOZDATA,
|
||||
PIONEER_NONPROD,
|
||||
PIONEER_PROD,
|
||||
|
@ -149,7 +149,7 @@ sql_dir_option = click.option(
|
|||
"--sql-dir",
|
||||
help="Path to directory which contains queries.",
|
||||
type=click.Path(file_okay=False),
|
||||
default="sql",
|
||||
default=ConfigLoader.get("default", "sql_dir", fallback="sql"),
|
||||
callback=is_valid_dir,
|
||||
)
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ class DryRun:
|
|||
}
|
||||
|
||||
# update skip list to include renamed queries in stage.
|
||||
test_project = ConfigLoader.get("dry_run", "test_project", fallback="")
|
||||
test_project = ConfigLoader.get("default", "test_project", fallback="")
|
||||
file_pattern_re = re.compile(r"sql/([^\/]+)/([^/]+)(/?.*|$)")
|
||||
skip_files.update(
|
||||
[
|
||||
|
|
|
@ -7,6 +7,8 @@ from argparse import ArgumentParser
|
|||
|
||||
import click
|
||||
|
||||
from bigquery_etl.config import ConfigLoader
|
||||
|
||||
from ..util import standard_args
|
||||
from ..util.common import project_dirs
|
||||
from .parse_metadata import DatasetMetadata, Metadata
|
||||
|
@ -37,7 +39,7 @@ def validate_change_control(
|
|||
metadata,
|
||||
codeowners_file,
|
||||
project_id="moz-fx-data-shared-prod",
|
||||
sql_dir="sql",
|
||||
sql_dir=ConfigLoader.get("default", "sql_dir", fallback="sql"),
|
||||
):
|
||||
"""Verify that a query is correctly setup for change control."""
|
||||
path_to_add = file_path.partition(f"{project_id}/")[2]
|
||||
|
|
|
@ -14,6 +14,7 @@ import attr
|
|||
import sqlparse
|
||||
import yaml
|
||||
|
||||
from bigquery_etl.config import ConfigLoader
|
||||
from bigquery_etl.metadata.parse_metadata import METADATA_FILE
|
||||
from bigquery_etl.util.common import render
|
||||
|
||||
|
@ -21,7 +22,6 @@ UDF_CHAR = "[a-zA-Z0-9_]"
|
|||
UDF_FILE = "udf.sql"
|
||||
PROCEDURE_FILE = "stored_procedure.sql"
|
||||
ROUTINE_FILE = (UDF_FILE, PROCEDURE_FILE)
|
||||
EXAMPLE_DIR = "examples"
|
||||
TEMP_UDF_RE = re.compile(f"(?:udf|assert)_{UDF_CHAR}+")
|
||||
PERSISTENT_UDF_PREFIX_RE_STR = (
|
||||
r"CREATE\s+(?:OR\s+REPLACE\s+)?(?:FUNCTION|PROCEDURE)(?:\s+IF\s+NOT\s+EXISTS)?"
|
||||
|
@ -33,8 +33,6 @@ PERSISTENT_UDF_RE = re.compile(
|
|||
)
|
||||
UDF_NAME_RE = re.compile(r"^([a-zA-Z0-9_]+\.)?[a-zA-Z][a-zA-Z0-9_]{0,255}$")
|
||||
GENERIC_DATASET = "_generic_dataset_"
|
||||
SQL_DIR = Path("sql/")
|
||||
ASSERT_UDF_DIR = "tests/assert"
|
||||
|
||||
raw_routines = {}
|
||||
|
||||
|
@ -57,8 +55,10 @@ def get_routines(project):
|
|||
"""Return all routines that could be referenced by the project."""
|
||||
return (
|
||||
get_routines_from_dir(project)
|
||||
+ get_routines_from_dir(SQL_DIR / "mozfun")
|
||||
+ get_routines_from_dir(ASSERT_UDF_DIR)
|
||||
+ get_routines_from_dir(
|
||||
Path(ConfigLoader.get("default", "sql_dir", fallback="sql")) / "mozfun"
|
||||
)
|
||||
+ get_routines_from_dir(ConfigLoader.get("routine", "assert_udf_dir"))
|
||||
) # assert UDFs used for testing
|
||||
|
||||
|
||||
|
@ -227,14 +227,17 @@ def read_routine_dir(*project_dirs):
|
|||
global raw_routines
|
||||
|
||||
if not project_dirs:
|
||||
project_dirs = (SQL_DIR, ASSERT_UDF_DIR)
|
||||
project_dirs = (
|
||||
ConfigLoader.get("default", "sql_dir"),
|
||||
ConfigLoader.get("routine", "assert_udf_dir"),
|
||||
)
|
||||
|
||||
if project_dirs not in raw_routines:
|
||||
raw_routines[project_dirs] = {
|
||||
raw_routine.name: raw_routine
|
||||
for project_dir in project_dirs
|
||||
for root, dirs, files in os.walk(project_dir)
|
||||
if os.path.basename(root) != EXAMPLE_DIR
|
||||
if os.path.basename(root) != ConfigLoader.get("routine", "example_dir")
|
||||
for filename in files
|
||||
if filename in ROUTINE_FILE
|
||||
for raw_routine in (RawRoutine.from_file(os.path.join(root, filename)),)
|
||||
|
@ -246,7 +249,11 @@ def read_routine_dir(*project_dirs):
|
|||
def parse_routines(project_dir):
|
||||
"""Read routine contents of the project dir into ParsedRoutine instances."""
|
||||
# collect udfs to parse
|
||||
raw_routines = read_routine_dir(project_dir, SQL_DIR / "mozfun", ASSERT_UDF_DIR)
|
||||
raw_routines = read_routine_dir(
|
||||
project_dir,
|
||||
Path(ConfigLoader.get("default", "sql_dir", fallback="sql")) / "mozfun",
|
||||
ConfigLoader.get("routine", "assert_udf_dir"),
|
||||
)
|
||||
|
||||
# prepend udf definitions to tests
|
||||
for raw_routine in raw_routines.values():
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
"""Publish UDFs and resources to the public mozfun GCP project."""
|
||||
|
||||
import fnmatch
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
|
@ -9,20 +10,14 @@ from argparse import ArgumentParser
|
|||
from google.cloud import storage # type: ignore
|
||||
from google.cloud import bigquery
|
||||
|
||||
from bigquery_etl.config import ConfigLoader
|
||||
from bigquery_etl.routine.parse_routine import accumulate_dependencies, read_routine_dir
|
||||
from bigquery_etl.util import standard_args
|
||||
from bigquery_etl.util.common import project_dirs
|
||||
|
||||
DEFAULT_UDF_DEPENDENCY_DIR = "udf_js_lib/"
|
||||
DEFAULT_GCS_BUCKET = "moz-fx-data-prod-bigquery-etl"
|
||||
DEFAULT_GCS_PATH = ""
|
||||
DEFAULT_PROJECT = "sql/moz-fx-data-shared-prod"
|
||||
SQL_DIR = "sql/"
|
||||
|
||||
OPTIONS_LIB_RE = re.compile(r'library = "gs://[^"]+/([^"]+)"')
|
||||
OPTIONS_RE = re.compile(r"OPTIONS(\n|\s)*\(")
|
||||
|
||||
SKIP = ["sql/moz-fx-data-shared-prod/udf/main_summary_scalars/udf.sql"]
|
||||
|
||||
parser = ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
|
@ -34,26 +29,27 @@ parser.add_argument(
|
|||
)
|
||||
parser.add_argument(
|
||||
"--target",
|
||||
default=DEFAULT_PROJECT,
|
||||
default=ConfigLoader.get("default", "sql_dir", fallback="sql/")
|
||||
+ ConfigLoader.get("default", "project"),
|
||||
required=False,
|
||||
help="Path to project directory.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dependency-dir",
|
||||
"--dependency_dir",
|
||||
default=DEFAULT_UDF_DEPENDENCY_DIR,
|
||||
default=ConfigLoader.get("routine", "dependency_dir"),
|
||||
help="The directory JavaScript dependency files for UDFs are stored.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gcs-bucket",
|
||||
"--gcs_bucket",
|
||||
default=DEFAULT_GCS_BUCKET,
|
||||
default=ConfigLoader.get("routine", "publish", "gcs_bucket"),
|
||||
help="The GCS bucket where dependency files are uploaded to.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gcs-path",
|
||||
"--gcs_path",
|
||||
default=DEFAULT_GCS_PATH,
|
||||
default=ConfigLoader.get("routine", "publish", "gcs_path"),
|
||||
help="The GCS path in the bucket where dependency files are uploaded to.",
|
||||
)
|
||||
parser.add_argument(
|
||||
|
@ -83,7 +79,7 @@ def main():
|
|||
publish(
|
||||
args.target,
|
||||
args.project_id,
|
||||
os.path.join(SQL_DIR, project, args.dependency_dir),
|
||||
os.path.join(ConfigLoader.get("sql_dir"), project, args.dependency_dir),
|
||||
args.gcs_bucket,
|
||||
args.gcs_path,
|
||||
args.public,
|
||||
|
@ -91,6 +87,18 @@ def main():
|
|||
)
|
||||
|
||||
|
||||
def skipped_routines():
|
||||
"""Get skipped routines from config."""
|
||||
return {
|
||||
file
|
||||
for skip in ConfigLoader.get("dry_run", "skip", fallback=[])
|
||||
for file in glob.glob(
|
||||
skip,
|
||||
recursive=True,
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
def publish(
|
||||
target,
|
||||
project_id,
|
||||
|
@ -123,7 +131,10 @@ def publish(
|
|||
udfs_to_publish.append(raw_routine)
|
||||
|
||||
for dep in udfs_to_publish:
|
||||
if dep not in published_routines and raw_routines[dep].filepath not in SKIP:
|
||||
if (
|
||||
dep not in published_routines
|
||||
and raw_routines[dep].filepath not in skipped_routines()
|
||||
):
|
||||
publish_routine(
|
||||
raw_routines[dep],
|
||||
client,
|
||||
|
@ -185,7 +196,10 @@ def publish_routine(
|
|||
)
|
||||
|
||||
# add UDF descriptions
|
||||
if raw_routine.filepath not in SKIP and not raw_routine.is_stored_procedure:
|
||||
if (
|
||||
raw_routine.filepath not in skipped_routines()
|
||||
and not raw_routine.is_stored_procedure
|
||||
):
|
||||
# descriptions need to be escaped since quotation marks and other
|
||||
# characters, such as \x01, will make the query invalid otherwise
|
||||
escaped_description = json.dumps(str(raw_routine.description))
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
default:
|
||||
project: moz-fx-data-shared-prod
|
||||
sql_dir: sql/
|
||||
test_project: bigquery-etl-integration-test
|
||||
|
||||
dry_run:
|
||||
function: https://us-central1-moz-fx-data-shared-prod.cloudfunctions.net/bigquery-etl-dryrun
|
||||
test_project: bigquery-etl-integration-test
|
||||
skip:
|
||||
# Access Denied
|
||||
- sql/moz-fx-data-shared-prod/account_ecosystem_derived/ecosystem_client_id_lookup_v1/query.sql
|
||||
|
@ -301,3 +305,14 @@ format:
|
|||
- sql/moz-fx-data-shared-prod/udf_legacy/date_trunc.sql
|
||||
- sql/moz-fx-data-shared-prod/udf_legacy/to_iso8601.sql
|
||||
- stored_procedures/safe_crc32_uuid.sql
|
||||
|
||||
routine:
|
||||
dependency_dir: udf_js_lib/
|
||||
publish:
|
||||
gcs_bucket: moz-fx-data-prod-bigquery-etl
|
||||
gcs_path: ""
|
||||
skip:
|
||||
- sql/moz-fx-data-shared-prod/udf/main_summary_scalars/udf.sql
|
||||
assert_udf_dir: tests/assert
|
||||
example_dir: examples
|
||||
project: mozfun
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
default:
|
||||
test_project: bigquery-etl-integration-test
|
||||
|
||||
dry_run:
|
||||
function: https://us-central1-moz-fx-data-shared-prod.cloudfunctions.net/bigquery-etl-dryrun
|
||||
test_project: bigquery-etl-integration-test
|
||||
skip:
|
||||
# Access Denied
|
||||
- sql/moz-fx-data-shared-prod/test_derived/some_query_v1/query.sql
|
||||
|
|
|
@ -14,14 +14,14 @@ class TestConfig:
|
|||
ConfigLoader.set_project_dir(TEST_DIR / "data")
|
||||
|
||||
assert "function" in ConfigLoader.get("dry_run")
|
||||
assert "test_project" in ConfigLoader.get("dry_run")
|
||||
assert "skip" in ConfigLoader.get("dry_run")
|
||||
|
||||
assert (
|
||||
ConfigLoader.get("dry_run", "test_project")
|
||||
ConfigLoader.get("default", "test_project")
|
||||
== "bigquery-etl-integration-test"
|
||||
)
|
||||
assert len(ConfigLoader.get("dry_run", "skip")) == 2
|
||||
|
||||
assert len(ConfigLoader.get("dry_run", "skip")) > 0
|
||||
|
||||
assert "dry_run" in ConfigLoader.get()
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче