Add examples for bqetl queries, format and dryrun

This commit is contained in:
Anna Scholtz 2021-03-24 15:53:20 -07:00
Родитель d0dd7e0dce
Коммит 5fd43bcd42
8 изменённых файлов: 185 добавлений и 26 удалений

Просмотреть файл

@ -6,7 +6,22 @@ import click
from bigquery_etl.alchemer.survey import get_survey_data, insert_to_bq
@click.group(help="Commands for importing alchemer data.")
@click.group(
help="""Commands for importing alchemer data.
Examples:
\b
# Import data from alchemer (surveygizmo) surveys into BigQuery.
# The date range is inclusive of the start and end values.
$ ./bqetl alchemer backfill --start-date=2021-01-01
--end-date=2021-02-01
--survey_id=xxxxxxxxxxx
--api_token=xxxxxxxxxxxxxx
--api_secret=xxxxxxxxxxxxxxx
--destination_table=moz-fx-data-shared-prod.telemetry_derived.survey_gizmo_daily_attitudes
"""
)
def alchemer():
"""Create the CLI group for the alchemer command."""
pass

Просмотреть файл

@ -39,12 +39,17 @@ def dag():
@dag.command(
help="""List all available DAGs
help="""Get information about available DAGs.
Examples:
# Get information about all available DAGs
./bqetl dag info
# Get information about a specific DAG
./bqetl dag info bqetl_ssl_ratios
# Get information about a specific DAG including scheduled tasks
./bqetl dag info --with_tasks bqetl_ssl_ratios
""",
)
@ -91,14 +96,28 @@ def info(name, dags_config, with_tasks):
@dag.command(
help="""Create a new DAG with name bqetl_<dag_name>, for example: bqetl_search
When creating new DAGs, the DAG name must have a `bqetl_` prefix.
Created DAGs are added to the `dags.yaml` file.
Examples:
Examples:
./bqetl dag create bqetl_core --schedule-interval="0 2 * * *"
--owner=example@mozilla.com
--description="Tables derived from `core` pings sent by mobile applications."
--start-date=2019-07-25
"""
\b
./bqetl dag create bqetl_core \\
--schedule-interval="0 2 * * *" \\
--owner=example@mozilla.com \\
--description="Tables derived from `core` pings sent by mobile applications." \\
--start-date=2019-07-25
\b
# Create DAG and overwrite default settings
./bqetl dag create bqetl_ssl_ratios --schedule-interval="0 2 * * *" \\
--owner=example@mozilla.com \\
--description="The DAG schedules SSL ratios queries." \\
--start-date=2019-07-20 \\
--email=example2@mozilla.com,example3@mozilla.com \\
--retries=2 \\
--retry_delay=30m
"""
)
@click.argument("name")
@dags_config_option
@ -188,8 +207,10 @@ def create(
Examples:
# Generate all DAGs
./bqetl dag generate
# Generate a specific DAG
./bqetl dag generate bqetl_ssl_ratios
"""
)
@ -216,11 +237,14 @@ def generate(name, dags_config, output_dir):
@dag.command(
help="""Remove a DAG
help="""Remove a DAG.
This will also remove the scheduling information from the queries that were scheduled
as part of the DAG.
Examples:
Examples:
./bqetl dag remove bqetl_vrbrowser
# Remove a specific DAG
./bqetl dag remove bqetl_vrbrowser
"""
)
@click.argument("name", required=False)

Просмотреть файл

@ -13,7 +13,18 @@ from ..dryrun import SKIP, DryRun
@click.command(
help="Dry run SQL.",
help="""Dry run SQL.
Uses the dryrun Cloud Function by default which only has access to shared-prod.
To dryrun queries accessing tables in another project use set
`--use-cloud-function=false` and ensure that the command line has access to a
GCP service account.
Examples:
./bqetl dryrun sql/moz-fx-data-shared-prod/telemetry_derived/
# Dry run SQL with tables that are not in shared prod
./bqetl dryrun --use-cloud-function=false sql/moz-fx-data-marketing-prod/
""",
)
@click.argument(
"path",

Просмотреть файл

@ -6,7 +6,16 @@ from bigquery_etl.format_sql.format import format as format_sql
@click.command(
help="Format SQL.",
help="""Format SQL files.
Examples:
# Format all SQL files
./bqetl format
# Format a specific file
./bqetl format sql/moz-fx-data-shared-prod/telemetry/core/view.sql
""",
)
@click.argument(
"path",

Просмотреть файл

@ -83,10 +83,23 @@ def query():
@query.command(
help="Create a new query with name "
"<dataset>.<query_name>, for example: telemetry_derived.asn_aggregates. "
"Use the --project_id option to change the project the query is added to; "
"default is moz-fx-data-shared-prod",
help="""Create a new query with name
<dataset>.<query_name>, for example: telemetry_derived.asn_aggregates.
Use the `--project_id` option to change the project the query is added to;
default is `moz-fx-data-shared-prod`. Views are automatically generated
in the publicly facing dataset.
Examples:
\b
./bqetl query create telemetry_derived.deviations_v1 \\
--owner=example@mozilla.com
\b
# The query version gets autocompleted to v1. Queries are created in the
# _derived dataset and accompanying views in the public dataset.
./bqetl query create telemetry.deviations --owner=example@mozilla.com
""",
)
@click.argument("name")
@sql_dir_option
@ -204,7 +217,20 @@ def create(name, sql_dir, project_id, owner, init):
@query.command(
help="Schedule an existing query",
help="""Schedule an existing query
Examples:
\b
./bqetl query schedule telemetry_derived.deviations_v1 \\
--dag=bqetl_deviations
\b
# Set a specific name for the task
./bqetl query schedule telemetry_derived.deviations_v1 \\
--dag=bqetl_deviations \\
--task-name=deviations
""",
)
@click.argument("name")
@sql_dir_option
@ -305,7 +331,19 @@ def schedule(name, sql_dir, project_id, dag, depends_on_past, task_name):
@query.command(
help="Get information about all or specific queries.",
help="""Get information about all or specific queries.
Examples:
\b
# Get info for specific queries
./bqetl query info telemetry_derived.*
\b
# Get cost and last update timestamp information
./bqetl query info telemetry_derived.clients_daily_v6 \\
--cost --last_updated
""",
)
@click.argument("name", required=False)
@sql_dir_option
@ -389,7 +427,24 @@ def info(name, sql_dir, project_id, cost, last_updated):
@query.command(
help="Run a backfill for a query. Additional parameters will get passed to bq.",
help="""Run a backfill for a query. Additional parameters will get passed to bq.
Examples:
\b
# Backfill for specific date range
./bqetl query backfill telemetry_derived.ssl_ratios_v1 \\
--start_date=2021-03-01 \\
--end_date=2021-03-31
\b
# Dryrun backfill for specific date range and exclude date
./bqetl query backfill telemetry_derived.ssl_ratios_v1 \\
--start_date=2021-03-01 \\
--end_date=2021-03-31 \\
--exclude=2021-03-03 \\
--dry_run
""",
context_settings=dict(
ignore_unknown_options=True,
allow_extra_args=True,
@ -496,7 +551,20 @@ def backfill(
@query.command(
help="Validate a query.",
help="""Validate a query.
Checks formatting, scheduling information and dry runs the query.
Examples:
./bqetl query validate telemetry_derived.clients_daily_v6
\b
# Validate query not in shared-prod
./bqetl query validate \\
--use_cloud_function=false \\
--project_id=moz-fx-data-marketing-prod \\
ga_derived.blogs_goals_v1
""",
)
@click.argument("name", required=False)
@sql_dir_option
@ -534,7 +602,13 @@ def validate(ctx, name, sql_dir, project_id, use_cloud_function):
@query.command(
help="Create and initialize the destination table for the query.",
help="""Create and initialize the destination table for the query.
Only for queries that have an `init.sql` file.
Examples:
./bqetl query initialize telemetry_derived.ssl_ratios_v1
""",
)
@click.argument("name")
@sql_dir_option
@ -576,7 +650,13 @@ def schema():
@schema.command(
help="Update the query schema",
help="""Update the query schema based on the destination table schema and the query schema.
If no schema.yaml file exists for a query, one will be created.
Examples:
./bqetl query schema update telemetry_derived.clients_daily_v6
""",
)
@click.argument("name")
@sql_dir_option
@ -666,7 +746,12 @@ def update(name, sql_dir, project_id):
@schema.command(
help="Deploy the query schema",
help="""Deploy the query schema.
Examples:
./bqetl query schema deploy telemetry_derived.clients_daily_v6
""",
)
@click.argument("name")
@sql_dir_option
@ -804,7 +889,15 @@ def _validate_schema(query_file):
return True
@schema.command(help="Validate the query schema", name="validate")
@schema.command(
help="""Validate the query schema
Examples:
./bqetl query schema validate telemetry_derived.clients_daily_v6
""",
name="validate",
)
@click.argument("name")
@sql_dir_option
@click.option(

Просмотреть файл

@ -1,3 +1,5 @@
"""Generate docs for bqetl commands."""
import click
from jinja2 import Environment, FileSystemLoader
import os

Просмотреть файл

@ -16,11 +16,13 @@ $ ./bqetl {{ command_group.name }} {{ command.name }} [OPTIONS]
{{ "" }} [{{ arg.name }}]
{%- endfor %}
{% if command.options | length > 1 -%}
Options:
{% for option in command.options -%}
--{{ option.name }}: {{ option.description }}
{% endfor -%}
{% endif %}
```
{% if command.examples -%}
@ -41,11 +43,13 @@ $ ./bqetl {{ command_group.name }} [OPTIONS]
{{ "" }}[{{ arg.name }}]
{%- endfor %}
{% if command_group.options | length > 1 -%}
Options:
{% for option in command_group.options -%}
--{{ option.name }}: {{ option.description }}
{% endfor -%}
{% endif -%}
```
{% if command_group.examples -%}

Просмотреть файл

@ -41,4 +41,5 @@ nav:
- ... | mozdata/**.md
- UDFs:
- ... | mozfun/**.md
- bqetl CLI: bqetl.md
- Reference:
- bqetl CLI: bqetl.md