This commit is contained in:
Anna Scholtz 2020-09-30 13:28:23 -07:00
Родитель 01880f7687
Коммит a16bc2824f
6 изменённых файлов: 9 добавлений и 10 удалений

Просмотреть файл

@ -96,21 +96,21 @@ Recommended practices
### Queries
- Should be defined in files named as `<project>/<dataset>/<table>_<version>/query.sql` e.g.
- Should be defined in files named as `sql/<project>/<dataset>/<table>_<version>/query.sql` e.g.
`sql/moz-fx-data-shared-prod/telemetry_derived/clients_daily_v7/query.sql`
- Queries that populate tables should always be named with a version suffix;
we assume that future optimizations to the data representation may require
schema-incompatible changes such as dropping columns
- May be generated using a python script that prints the query to stdout
- Should save output as `<project>/<dataset>/<table>_<version>/query.sql` as above
- Should be named as `<project>/query_type.sql.py` e.g. `sql/moz-fx-data-shared-prod/clients_daily.sql.py`
- Should save output as `sql/<project>/<dataset>/<table>_<version>/query.sql` as above
- Should be named as `sql/<project>/query_type.sql.py` e.g. `sql/moz-fx-data-shared-prod/clients_daily.sql.py`
- May use options to generate queries for different destination tables e.g.
using `--source telemetry_core_parquet_v3` to generate
`sql/moz-fx-data-shared-prod/telemetry/core_clients_daily_v1/query.sql` and using `--source main_summary_v4` to
generate `sql/moz-fx-data-shared-prod/telemetry/clients_daily_v7/query.sql`
- Should output a header indicating options used e.g.
```sql
-- Query generated by: moz-fx-data-shared-prod/clients_daily.sql.py --source telemetry_core_parquet
-- Query generated by: sql/moz-fx-data-shared-prod/clients_daily.sql.py --source telemetry_core_parquet
```
- Should not specify a project or dataset in table names to simplify testing
- Should be [incremental]
@ -126,7 +126,7 @@ Recommended practices
### Views
- Should be defined in files named as `<project>/<dataset>/<table>/view.sql` e.g.
- Should be defined in files named as `sql/<project>/<dataset>/<table>/view.sql` e.g.
`sql/moz-fx-data-shared-prod/telemetry/core/view.sql`
- Views should generally _not_ be named with a version suffix; a view represents a
stable interface for users and whenever possible should maintain compatibility

Просмотреть файл

@ -10,7 +10,7 @@ from bigquery_etl.format_sql.format import format as format_sql
)
@click.argument(
"path",
default="sql/", # todo: apply formatting to all projects
default="sql/",
type=click.Path(file_okay=True),
)
def format(path):

Просмотреть файл

@ -51,7 +51,7 @@ def sql_for_dry_run(file, parsed_udfs, project_dir):
dry_run_sql = dry_run_sql.replace(udf, udf.replace(".", "_"))
# remove explicit project references
dry_run_sql = dry_run_sql.replace(project_dir + ".", "")
dry_run_sql = dry_run_sql.replace(os.path.basename(project_dir) + ".", "")
return dry_run_sql

Просмотреть файл

@ -217,7 +217,7 @@ class Task:
else:
raise ValueError(
"query_file must be a path with format:"
" <project>/<dataset>/<table>_<version>/(query.sql|part1.sql)"
" sql/<project>/<dataset>/<table>_<version>/(query.sql|part1.sql)"
f" but is {self.query_file}"
)

Просмотреть файл

@ -18,7 +18,7 @@ from bigquery_etl.util.common import project_dirs
UDF_DIRS = ("udf", "udf_js")
MOZFUN_DIR = ("mozfun",)
MOZFUN_DIR = ("sql/mozfun",)
UDF_CHAR = "[a-zA-z0-9_]"
UDF_FILE = "udf.sql"
PROCEDURE_FILE = "stored_procedure.sql"

Просмотреть файл

@ -64,7 +64,6 @@ def main():
parser.error(f"argument --log-level: {e}")
client = bigquery.Client()
# todo: support other projects
tables = get_tables_matching_patterns(client, args.patterns)
views = {}