From 95199bba4081dd22954f73933e367e7a642e2f09 Mon Sep 17 00:00:00 2001 From: Ben Wu <12437227+BenWu@users.noreply.github.com> Date: Thu, 9 May 2024 17:59:59 -0400 Subject: [PATCH] [Bug 1892284] Run events_stream_v1 queries in moz-fx-data-backfill-2 (#5543) --- bigquery_etl/cli/query.py | 11 +++++------ .../templates/events_stream_v1.metadata.yaml | 2 ++ tests/test_run_query.py | 6 +++--- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/bigquery_etl/cli/query.py b/bigquery_etl/cli/query.py index 1f8d02de36..853011de22 100644 --- a/bigquery_etl/cli/query.py +++ b/bigquery_etl/cli/query.py @@ -870,12 +870,6 @@ def _run_query( billing_project is the project to run the query in for the purposes of billing and slot reservation selection. This is project_id if billing_project is not set """ - # if billing_project is set, default dataset is set with the @@dataset_id variable instead - if dataset_id is not None and billing_project is None: - # dataset ID was parsed by argparse but needs to be passed as parameter - # when running the query - query_arguments.append(f"--dataset_id={dataset_id}") - if billing_project is not None: query_arguments.append(f"--project_id={billing_project}") elif project_id is not None: @@ -962,6 +956,11 @@ def _run_query( default_dataset=dataset_id or default_dataset, ) query_arguments.append(f"--session_id={session_id}") + # if billing_project is set, default dataset is set with the @@dataset_id variable instead + elif dataset_id is not None: + # dataset ID was parsed by argparse but needs to be passed as parameter + # when running the query + query_arguments.append(f"--dataset_id={dataset_id}") # write rendered query to a temporary file; # query string cannot be passed directly to bq as SQL comments will be interpreted as CLI arguments diff --git a/sql_generators/glean_usage/templates/events_stream_v1.metadata.yaml b/sql_generators/glean_usage/templates/events_stream_v1.metadata.yaml index e017ce7af3..8f587fa55d 100644 --- a/sql_generators/glean_usage/templates/events_stream_v1.metadata.yaml +++ b/sql_generators/glean_usage/templates/events_stream_v1.metadata.yaml @@ -14,6 +14,8 @@ labels: scheduling: dag_name: bqetl_glean_usage task_group: {{ app_name }} + # Use backfill-2 project for on-demand query billing + arguments: ["--billing-project", "moz-fx-data-backfill-2"] bigquery: time_partitioning: type: day diff --git a/tests/test_run_query.py b/tests/test_run_query.py index 0406952f28..7fa9147a35 100644 --- a/tests/test_run_query.py +++ b/tests/test_run_query.py @@ -37,8 +37,8 @@ class TestRunQuery: [ "bq", "query", - "--dataset_id=test", "--destination_table=query_v1", + "--dataset_id=test", ], ) assert "stdin" in mock_call.call_args.kwargs @@ -76,8 +76,8 @@ class TestRunQuery: [ "bq", "query", - "--dataset_id=test", "--destination_table=mozdata:test.query_v1", + "--dataset_id=test", ], ) assert "stdin" in mock_call.call_args.kwargs @@ -112,8 +112,8 @@ class TestRunQuery: [ "bq", "query", - "--dataset_id=test", "--destination_table=mozilla-public-data:test.query_v1", + "--dataset_id=test", ], )