GROWTH-101 - Update gclid_conversions view to 1-row per conversion (#4612)

* Update gclid_conversions view to 1-row per conversion

* Fully qualify table
This commit is contained in:
Frank Bertsch 2024-02-12 17:16:59 -05:00 коммит произвёл GitHub
Родитель b605cd9e26
Коммит 2d407f7e93
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
3 изменённых файлов: 57 добавлений и 3 удалений

Просмотреть файл

@ -20,13 +20,17 @@ stable_views = None
def _raw_table_name(table: sqlglot.exp.Table) -> str:
return (
with_replacements = (
table.sql("bigquery", comments=False)
# remove alias
.split(" AS ", 1)[0]
# remove quotes
.replace("`", "")
)
# remove PIVOT/UNPIVOT
removed_pivots = re.sub(" (?:UN)?PIVOT.*$", "", with_replacements)
return removed_pivots
def extract_table_references(sql: str) -> List[str]:

Просмотреть файл

@ -2,6 +2,30 @@ CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.mozilla_org.gclid_conversions`
AS
SELECT
*
FORMAT_DATETIME("%F %T", DATETIME(activity_date, TIME(23, 59, 59))) AS activity_date,
gclid,
-- Names as represented in Google Ads
-- https://docs.google.com/spreadsheets/d/1YzhhvbpOlqPLORRJUZ55BIb0H20hwFqQFApR-r0UMfI
CASE
conversion_name
WHEN "did_firefox_first_run"
THEN "firefox_first_run"
WHEN "did_search"
THEN "firefox_first_search"
WHEN "did_click_ad"
THEN "firefox_first_ad_click"
WHEN "did_returned_second_day"
THEN "firefox_second_run"
ELSE NULL
END AS conversion_name,
FROM
`moz-fx-data-shared-prod.mozilla_org_derived.gclid_conversions_v1`
`moz-fx-data-shared-prod`.mozilla_org_derived.gclid_conversions_v1 UNPIVOT(
did_conversion FOR conversion_name IN (
did_firefox_first_run,
did_search,
did_click_ad,
did_returned_second_day
)
)
WHERE
did_conversion

26
tests/test_dependency.py Normal file
Просмотреть файл

@ -0,0 +1,26 @@
from bigquery_etl.dependency import extract_table_references
class TestDependency:
def test_extract_table_refs_correctly_ignores_unpivot(self):
unpivot_query = "SELECT * FROM a UNPIVOT(b FOR c IN (d, e, f))"
refs = extract_table_references(unpivot_query)
assert refs == ["a"]
def test_extract_table_refs_correctly_ignores_pivot(self):
pivot_query = """SELECT * FROM Produce
PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))
"""
refs = extract_table_references(pivot_query)
assert refs == ["Produce"]
def test_extract_table_refs_pivot_and_join(self):
pivot_query = """SELECT * FROM Produce
PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))
JOIN Perishable_Mints USING (name)
"""
refs = extract_table_references(pivot_query)
assert set(refs) == {"Produce", "Perishable_Mints"}