diff --git a/Dockerfile b/Dockerfile index f67037de18..987b992f5f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,8 +21,8 @@ RUN pip install --no-deps -r requirements.txt FROM google/cloud-sdk:${GOOGLE_CLOUD_SDK_VERSION}-alpine AS google-cloud-sdk FROM base -# add bash for entrypoint and jdk for jni access to zetasql -RUN mkdir -p /usr/share/man/man1 && apt-get update -qqy && apt-get install -qqy bash default-jdk-headless git +# add bash for entrypoint +RUN mkdir -p /usr/share/man/man1 && apt-get update -qqy && apt-get install -qqy bash git COPY --from=google-cloud-sdk /google-cloud-sdk /google-cloud-sdk ENV PATH /google-cloud-sdk/bin:$PATH COPY --from=python-deps /usr/local /usr/local diff --git a/bigquery_etl/query_scheduling/task.py b/bigquery_etl/query_scheduling/task.py index 0b5f238fda..962b814284 100644 --- a/bigquery_etl/query_scheduling/task.py +++ b/bigquery_etl/query_scheduling/task.py @@ -438,7 +438,7 @@ class Task: ) def _get_referenced_tables(self): - """Use zetasql to get tables the query depends on.""" + """Use sqlglot to get tables the query depends on.""" logging.info(f"Get dependencies for {self.task_key}") if self.is_python_script: diff --git a/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/subscriptions_v1/query.sql b/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/subscriptions_v1/query.sql index ef35bfec1a..0dbd97885c 100644 --- a/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/subscriptions_v1/query.sql +++ b/sql/moz-fx-cjms-nonprod-9a36/cjms_bigquery/subscriptions_v1/query.sql @@ -74,9 +74,6 @@ initial_invoices AS ( `moz-fx-data-shared-prod`.stripe_external.nonprod_invoice_v1 AS invoices USING (subscription_id) - -- ZetaSQL requires QUALIFY to be used in conjunction with WHERE, GROUP BY, or HAVING. - WHERE - TRUE QUALIFY 1 = ROW_NUMBER() OVER (PARTITION BY subscription_id ORDER BY invoices.created) ), diff --git a/sql/moz-fx-cjms-prod-f3c7/cjms_bigquery/subscriptions_v1/query.sql b/sql/moz-fx-cjms-prod-f3c7/cjms_bigquery/subscriptions_v1/query.sql index 08ec49eef2..e193b3fc8a 100644 --- a/sql/moz-fx-cjms-prod-f3c7/cjms_bigquery/subscriptions_v1/query.sql +++ b/sql/moz-fx-cjms-prod-f3c7/cjms_bigquery/subscriptions_v1/query.sql @@ -59,9 +59,6 @@ initial_invoices AS ( `moz-fx-data-shared-prod`.stripe_external.invoice_v1 AS invoices USING (subscription_id) - -- ZetaSQL requires QUALIFY to be used in conjunction with WHERE, GROUP BY, or HAVING. - WHERE - TRUE QUALIFY 1 = ROW_NUMBER() OVER (PARTITION BY subscription_id ORDER BY invoices.created) ), diff --git a/sql/moz-fx-data-shared-prod/hubs_derived/subscription_events_live/view.sql b/sql/moz-fx-data-shared-prod/hubs_derived/subscription_events_live/view.sql index 5ad1ab2cf3..0d22ac86b2 100644 --- a/sql/moz-fx-data-shared-prod/hubs_derived/subscription_events_live/view.sql +++ b/sql/moz-fx-data-shared-prod/hubs_derived/subscription_events_live/view.sql @@ -49,8 +49,6 @@ new_events AS ( "New" AS event_type, FROM `moz-fx-data-shared-prod`.hubs.active_subscription_ids - WHERE - TRUE -- zetasql requires QUALIFY to be used in conjunction with WHERE, GROUP BY, or HAVING QUALIFY LAG(active_date) OVER (PARTITION BY subscription_id ORDER BY active_date) IS DISTINCT FROM ( active_date - 1 @@ -65,8 +63,6 @@ cancelled_events AS ( `moz-fx-data-shared-prod`.hubs.active_subscription_ids CROSS JOIN max_active_date - WHERE - TRUE -- zetasql requires QUALIFY to be used in conjunction with WHERE, GROUP BY, or HAVING QUALIFY LEAD(active_date) OVER (PARTITION BY subscription_id ORDER BY active_date) IS DISTINCT FROM ( active_date + 1 diff --git a/sql/moz-fx-data-shared-prod/mozilla_vpn_derived/subscription_events_live/view.sql b/sql/moz-fx-data-shared-prod/mozilla_vpn_derived/subscription_events_live/view.sql index c0d45f1003..f7e63fbe58 100644 --- a/sql/moz-fx-data-shared-prod/mozilla_vpn_derived/subscription_events_live/view.sql +++ b/sql/moz-fx-data-shared-prod/mozilla_vpn_derived/subscription_events_live/view.sql @@ -49,8 +49,6 @@ new_events AS ( "New" AS event_type, FROM `moz-fx-data-shared-prod`.mozilla_vpn.active_subscription_ids - WHERE - TRUE -- zetasql requires QUALIFY to be used in conjunction with WHERE, GROUP BY, or HAVING QUALIFY LAG(active_date) OVER (PARTITION BY subscription_id ORDER BY active_date) IS DISTINCT FROM ( active_date - 1 @@ -65,8 +63,6 @@ cancelled_events AS ( `moz-fx-data-shared-prod`.mozilla_vpn.active_subscription_ids CROSS JOIN max_active_date - WHERE - TRUE -- zetasql requires QUALIFY to be used in conjunction with WHERE, GROUP BY, or HAVING QUALIFY LEAD(active_date) OVER (PARTITION BY subscription_id ORDER BY active_date) IS DISTINCT FROM ( active_date + 1 diff --git a/sql/moz-fx-data-shared-prod/relay_derived/subscription_events_live/view.sql b/sql/moz-fx-data-shared-prod/relay_derived/subscription_events_live/view.sql index bd7973c32d..ea9be6573b 100644 --- a/sql/moz-fx-data-shared-prod/relay_derived/subscription_events_live/view.sql +++ b/sql/moz-fx-data-shared-prod/relay_derived/subscription_events_live/view.sql @@ -49,8 +49,6 @@ new_events AS ( "New" AS event_type, FROM `moz-fx-data-shared-prod`.relay.active_subscription_ids - WHERE - TRUE -- zetasql requires QUALIFY to be used in conjunction with WHERE, GROUP BY, or HAVING QUALIFY LAG(active_date) OVER (PARTITION BY subscription_id ORDER BY active_date) IS DISTINCT FROM ( active_date - 1 @@ -65,8 +63,6 @@ cancelled_events AS ( `moz-fx-data-shared-prod`.relay.active_subscription_ids CROSS JOIN max_active_date - WHERE - TRUE -- zetasql requires QUALIFY to be used in conjunction with WHERE, GROUP BY, or HAVING QUALIFY LEAD(active_date) OVER (PARTITION BY subscription_id ORDER BY active_date) IS DISTINCT FROM ( active_date + 1 diff --git a/sql/moz-fx-data-shared-prod/subscription_platform_derived/nonprod_stripe_subscriptions_history_v1/query.sql b/sql/moz-fx-data-shared-prod/subscription_platform_derived/nonprod_stripe_subscriptions_history_v1/query.sql index be2b4a070e..f200655b97 100644 --- a/sql/moz-fx-data-shared-prod/subscription_platform_derived/nonprod_stripe_subscriptions_history_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/subscription_platform_derived/nonprod_stripe_subscriptions_history_v1/query.sql @@ -119,9 +119,6 @@ subscription_items AS ( plan_id, FROM `moz-fx-data-shared-prod`.stripe_external.nonprod_subscription_item_v1 - -- ZetaSQL requires QUALIFY to be used in conjunction with WHERE, GROUP BY, or HAVING. - WHERE - TRUE QUALIFY -- With how our subscription platform currently works each Stripe subscription should -- only have one subscription item, and we enforce that so the ETL can rely on it. diff --git a/sql/moz-fx-data-shared-prod/subscription_platform_derived/stripe_subscriptions_history_v1/query.sql b/sql/moz-fx-data-shared-prod/subscription_platform_derived/stripe_subscriptions_history_v1/query.sql index b0bd116c80..1d7656cbe6 100644 --- a/sql/moz-fx-data-shared-prod/subscription_platform_derived/stripe_subscriptions_history_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/subscription_platform_derived/stripe_subscriptions_history_v1/query.sql @@ -119,9 +119,6 @@ subscription_items AS ( plan_id, FROM `moz-fx-data-shared-prod`.stripe_external.subscription_item_v1 - -- ZetaSQL requires QUALIFY to be used in conjunction with WHERE, GROUP BY, or HAVING. - WHERE - TRUE QUALIFY -- With how our subscription platform currently works each Stripe subscription should -- only have one subscription item, and we enforce that so the ETL can rely on it. diff --git a/sql/moz-fx-data-shared-prod/telemetry_derived/fog_decision_support_percentiles_v1/query.sql b/sql/moz-fx-data-shared-prod/telemetry_derived/fog_decision_support_percentiles_v1/query.sql index f48dde9635..ab14627c51 100644 --- a/sql/moz-fx-data-shared-prod/telemetry_derived/fog_decision_support_percentiles_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/telemetry_derived/fog_decision_support_percentiles_v1/query.sql @@ -92,8 +92,6 @@ metrics_windowed AS ( FROM metrics_unioned, UNNEST(metrics) - WHERE - TRUE QUALIFY ROW_NUMBER() OVER (PARTITION BY channel, metric_name) = 1 ), @@ -187,8 +185,6 @@ baseline_windowed AS ( FROM baseline_unioned, UNNEST(metrics) - WHERE - TRUE QUALIFY ROW_NUMBER() OVER (PARTITION BY channel, metric_name) = 1 ), @@ -285,8 +281,6 @@ windowed AS ( FROM unioned, UNNEST(metrics) - WHERE - TRUE QUALIFY ROW_NUMBER() OVER (PARTITION BY channel, metric_name) = 1 ) diff --git a/sql_generators/glean_usage/templates/baseline_clients_first_seen_v1.query.sql b/sql_generators/glean_usage/templates/baseline_clients_first_seen_v1.query.sql index 0f50421e8d..58973b2e8d 100644 --- a/sql_generators/glean_usage/templates/baseline_clients_first_seen_v1.query.sql +++ b/sql_generators/glean_usage/templates/baseline_clients_first_seen_v1.query.sql @@ -100,8 +100,6 @@ SELECT sample_id, client_id FROM _joined -WHERE - TRUE QUALIFY IF( COUNT(*) OVER (PARTITION BY client_id) > 1,