diff --git a/bigquery_etl/dependency.py b/bigquery_etl/dependency.py index a554dec17e..35c5884517 100644 --- a/bigquery_etl/dependency.py +++ b/bigquery_etl/dependency.py @@ -20,13 +20,17 @@ stable_views = None def _raw_table_name(table: sqlglot.exp.Table) -> str: - return ( + with_replacements = ( table.sql("bigquery", comments=False) # remove alias .split(" AS ", 1)[0] # remove quotes .replace("`", "") ) + # remove PIVOT/UNPIVOT + removed_pivots = re.sub(" (?:UN)?PIVOT.*$", "", with_replacements) + + return removed_pivots def extract_table_references(sql: str) -> List[str]: diff --git a/sql/moz-fx-data-shared-prod/mozilla_org/gclid_conversions/view.sql b/sql/moz-fx-data-shared-prod/mozilla_org/gclid_conversions/view.sql index 7d42d1a53d..a6507a8cce 100644 --- a/sql/moz-fx-data-shared-prod/mozilla_org/gclid_conversions/view.sql +++ b/sql/moz-fx-data-shared-prod/mozilla_org/gclid_conversions/view.sql @@ -2,6 +2,30 @@ CREATE OR REPLACE VIEW `moz-fx-data-shared-prod.mozilla_org.gclid_conversions` AS SELECT - * + FORMAT_DATETIME("%F %T", DATETIME(activity_date, TIME(23, 59, 59))) AS activity_date, + gclid, + -- Names as represented in Google Ads + -- https://docs.google.com/spreadsheets/d/1YzhhvbpOlqPLORRJUZ55BIb0H20hwFqQFApR-r0UMfI + CASE + conversion_name + WHEN "did_firefox_first_run" + THEN "firefox_first_run" + WHEN "did_search" + THEN "firefox_first_search" + WHEN "did_click_ad" + THEN "firefox_first_ad_click" + WHEN "did_returned_second_day" + THEN "firefox_second_run" + ELSE NULL + END AS conversion_name, FROM - `moz-fx-data-shared-prod.mozilla_org_derived.gclid_conversions_v1` + `moz-fx-data-shared-prod`.mozilla_org_derived.gclid_conversions_v1 UNPIVOT( + did_conversion FOR conversion_name IN ( + did_firefox_first_run, + did_search, + did_click_ad, + did_returned_second_day + ) + ) +WHERE + did_conversion diff --git a/tests/test_dependency.py b/tests/test_dependency.py new file mode 100644 index 0000000000..aa1e7a12be --- /dev/null +++ b/tests/test_dependency.py @@ -0,0 +1,26 @@ +from bigquery_etl.dependency import extract_table_references + + +class TestDependency: + def test_extract_table_refs_correctly_ignores_unpivot(self): + unpivot_query = "SELECT * FROM a UNPIVOT(b FOR c IN (d, e, f))" + refs = extract_table_references(unpivot_query) + + assert refs == ["a"] + + def test_extract_table_refs_correctly_ignores_pivot(self): + pivot_query = """SELECT * FROM Produce + PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4')) + """ + refs = extract_table_references(pivot_query) + + assert refs == ["Produce"] + + def test_extract_table_refs_pivot_and_join(self): + pivot_query = """SELECT * FROM Produce + PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4')) + JOIN Perishable_Mints USING (name) + """ + refs = extract_table_references(pivot_query) + + assert set(refs) == {"Produce", "Perishable_Mints"}