Google Search Console revisions (DENG-1733) (#5424)
* Classify queries in Google Search Console data. * Refactor repeated Google Search Console logic into UDFs. * Replace `page_path_segment_1` field with `localized_site_code` field, and add related derived fields. * Rename `country_code` field `user_country_code`. * Add `user_country` field. * Include Google Search Console data for MDN.
This commit is contained in:
Родитель
593f11a19c
Коммит
079fbe2188
|
@ -3,15 +3,20 @@ description: |-
|
|||
Google Search impressions aggregated by page for the following domains:
|
||||
* addons.mozilla.org
|
||||
* blog.mozilla.org
|
||||
* developer.mozilla.org
|
||||
* getpocket.com
|
||||
* support.mozilla.org
|
||||
* www.mozilla.org
|
||||
|
||||
Anonymized search queries, and Discover and Google News search impressions are included from August 2023 onward.
|
||||
For the developer.mozilla.org domain:
|
||||
* Records from 2024-04-10 onward are from source data exported directly to BigQuery by Google.
|
||||
* Records before 2024-04-10 are from source data synced to BigQuery by Fivetran.
|
||||
|
||||
Source data from before August 2023 was synced to BigQuery by Fivetran.
|
||||
Source data from August 2023 onward was exported directly to BigQuery by Google.
|
||||
See https://bugzilla.mozilla.org/show_bug.cgi?id=1764960#c34.
|
||||
For the other domains:
|
||||
* Records from 2023-08-01 onward are from source data exported directly to BigQuery by Google.
|
||||
* Records before 2023-08-01 are from source data synced to BigQuery by Fivetran.
|
||||
|
||||
Anonymized search queries, and Discover and Google News search impressions are only included if the source data was exported directly to BigQuery by Google.
|
||||
owners:
|
||||
- srose@mozilla.com
|
||||
labels:
|
||||
|
|
|
@ -31,11 +31,41 @@ fields:
|
|||
description: |-
|
||||
The path part of the page URL.
|
||||
This will be null for anonymized Discover impressions.
|
||||
- name: page_path_segment_1
|
||||
- name: localized_site_code
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: |-
|
||||
The first segment of the page URL path, which is often a locale like `en-US` or `de`.
|
||||
Localized site code such as `en-US` or `de` found in the first segment of the page URL path (if any).
|
||||
This will be null for anonymized Discover impressions.
|
||||
- name: localized_site
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: |-
|
||||
Localized site description based on `localized_site_language` and `localized_site_country` (if any).
|
||||
This will be null for anonymized Discover impressions.
|
||||
- name: localized_site_language_code
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: |-
|
||||
Localized site language code in ISO-639-alpha-2 format found in the first segment of the page URL path (if any).
|
||||
This will be null for anonymized Discover impressions.
|
||||
- name: localized_site_language
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: |-
|
||||
Localized site language based on `localized_site_language_code` (if any).
|
||||
This will be null for anonymized Discover impressions.
|
||||
- name: localized_site_country_code
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: |-
|
||||
Localized site country code in ISO-3166-1-alpha-2 format found in the first segment of the page URL path (if any).
|
||||
This will be null for anonymized Discover impressions.
|
||||
- name: localized_site_country
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: |-
|
||||
Localized site country based on `localized_site_country_code` (if any).
|
||||
This will be null for anonymized Discover impressions.
|
||||
- name: query
|
||||
type: STRING
|
||||
|
@ -43,6 +73,16 @@ fields:
|
|||
description: |-
|
||||
The search query.
|
||||
This will be null for anonymized search impressions, and all Discover and Google News search impressions.
|
||||
- name: query_type
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: |-
|
||||
Type of search query:
|
||||
* Anonymized: Query was redacted by Google to protect the users' privacy.
|
||||
* Brand: Query contained one or more Mozilla brand keywords.
|
||||
* Non-Brand: Query didn't contain any Mozilla brand keywords.
|
||||
* Unknown: Query couldn't be classified.
|
||||
This will be null for all Discover and Google News search impressions.
|
||||
- name: is_anonymized
|
||||
type: BOOLEAN
|
||||
mode: NULLABLE
|
||||
|
@ -55,7 +95,7 @@ fields:
|
|||
mode: NULLABLE
|
||||
description: |-
|
||||
Whether Google Search considers the page to be providing a good page experience.
|
||||
This will be null prior to August 2023.
|
||||
This will be null when the source data wasn't exported directly to BigQuery by Google.
|
||||
- name: search_type
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
|
@ -72,12 +112,18 @@ fields:
|
|||
mode: NULLABLE
|
||||
description: |-
|
||||
How the search result appeared (e.g. normal result, translated result, video).
|
||||
This will be null prior to August 2023.
|
||||
- name: country_code
|
||||
This will be null when the source data wasn't exported directly to BigQuery by Google.
|
||||
- name: user_country_code
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: |-
|
||||
Country from which the search was made, in ISO-3166-1-Alpha-3 format.
|
||||
Country from which the user was searching, in ISO-3166-1-alpha-3 format.
|
||||
This will be null for anonymized Discover impressions.
|
||||
- name: user_country
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: |-
|
||||
Country from which the user was searching.
|
||||
This will be null for anonymized Discover impressions.
|
||||
- name: device_type
|
||||
type: STRING
|
||||
|
|
|
@ -1,48 +1,131 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-marketing-prod.google_search_console.search_impressions_by_page`
|
||||
AS
|
||||
WITH search_impressions_union AS (
|
||||
SELECT
|
||||
`date`,
|
||||
site_url,
|
||||
site_domain_name,
|
||||
page_url,
|
||||
page_domain_name,
|
||||
page_path,
|
||||
localized_site_code,
|
||||
localized_site_language_code,
|
||||
localized_site_country_code,
|
||||
query,
|
||||
FALSE AS is_anonymized,
|
||||
CAST(NULL AS BOOLEAN) AS has_good_page_experience,
|
||||
search_type,
|
||||
CAST(NULL AS STRING) AS search_appearance,
|
||||
user_country_code,
|
||||
device_type,
|
||||
impressions,
|
||||
clicks,
|
||||
average_position
|
||||
FROM
|
||||
`moz-fx-data-marketing-prod.google_search_console_derived.search_impressions_by_page_v1`
|
||||
WHERE
|
||||
CASE
|
||||
WHEN site_domain_name IN (
|
||||
'addons.mozilla.org',
|
||||
'blog.mozilla.org',
|
||||
'getpocket.com',
|
||||
'support.mozilla.org',
|
||||
'www.mozilla.org'
|
||||
)
|
||||
THEN `date` < '2023-08-01'
|
||||
WHEN site_domain_name = 'developer.mozilla.org'
|
||||
THEN `date` < '2024-04-10'
|
||||
ELSE FALSE
|
||||
END
|
||||
UNION ALL
|
||||
SELECT
|
||||
`date`,
|
||||
site_url,
|
||||
site_domain_name,
|
||||
page_url,
|
||||
page_domain_name,
|
||||
page_path,
|
||||
localized_site_code,
|
||||
localized_site_language_code,
|
||||
localized_site_country_code,
|
||||
query,
|
||||
is_anonymized,
|
||||
has_good_page_experience,
|
||||
search_type,
|
||||
search_appearance,
|
||||
user_country_code,
|
||||
device_type,
|
||||
impressions,
|
||||
clicks,
|
||||
average_position
|
||||
FROM
|
||||
`moz-fx-data-marketing-prod.google_search_console_derived.search_impressions_by_page_v2`
|
||||
WHERE
|
||||
CASE
|
||||
WHEN site_domain_name IN (
|
||||
'addons.mozilla.org',
|
||||
'blog.mozilla.org',
|
||||
'getpocket.com',
|
||||
'support.mozilla.org',
|
||||
'www.mozilla.org'
|
||||
)
|
||||
THEN `date` >= '2023-08-01'
|
||||
ELSE TRUE
|
||||
END
|
||||
)
|
||||
SELECT
|
||||
`date`,
|
||||
site_url,
|
||||
site_domain_name,
|
||||
page_url,
|
||||
page_domain_name,
|
||||
page_path,
|
||||
page_path_segment_1,
|
||||
query,
|
||||
FALSE AS is_anonymized,
|
||||
CAST(NULL AS BOOLEAN) AS has_good_page_experience,
|
||||
search_type,
|
||||
CAST(NULL AS STRING) AS search_appearance,
|
||||
country_code,
|
||||
device_type,
|
||||
impressions,
|
||||
clicks,
|
||||
average_position
|
||||
search_impressions.date,
|
||||
search_impressions.site_url,
|
||||
search_impressions.site_domain_name,
|
||||
search_impressions.page_url,
|
||||
search_impressions.page_domain_name,
|
||||
search_impressions.page_path,
|
||||
search_impressions.localized_site_code,
|
||||
CONCAT(
|
||||
COALESCE(localized_site_language.name, search_impressions.localized_site_language_code),
|
||||
COALESCE(
|
||||
CONCAT(
|
||||
' - ',
|
||||
COALESCE(localized_site_country.name, search_impressions.localized_site_country_code)
|
||||
),
|
||||
''
|
||||
)
|
||||
) AS localized_site,
|
||||
search_impressions.localized_site_language_code,
|
||||
COALESCE(
|
||||
localized_site_language.name,
|
||||
search_impressions.localized_site_language_code
|
||||
) AS localized_site_language,
|
||||
search_impressions.localized_site_country_code,
|
||||
COALESCE(
|
||||
localized_site_country.name,
|
||||
search_impressions.localized_site_country_code
|
||||
) AS localized_site_country,
|
||||
search_impressions.query,
|
||||
mozfun.google_search_console.classify_site_query(
|
||||
search_impressions.site_domain_name,
|
||||
search_impressions.query,
|
||||
search_impressions.search_type
|
||||
) AS query_type,
|
||||
search_impressions.is_anonymized,
|
||||
search_impressions.has_good_page_experience,
|
||||
search_impressions.search_type,
|
||||
search_impressions.search_appearance,
|
||||
search_impressions.user_country_code,
|
||||
COALESCE(user_country.name, search_impressions.user_country_code) AS user_country,
|
||||
search_impressions.device_type,
|
||||
search_impressions.impressions,
|
||||
search_impressions.clicks,
|
||||
search_impressions.average_position
|
||||
FROM
|
||||
`moz-fx-data-marketing-prod.google_search_console_derived.search_impressions_by_page_v1`
|
||||
WHERE
|
||||
`date` < '2023-08-01'
|
||||
UNION ALL
|
||||
SELECT
|
||||
`date`,
|
||||
site_url,
|
||||
site_domain_name,
|
||||
page_url,
|
||||
page_domain_name,
|
||||
page_path,
|
||||
page_path_segment_1,
|
||||
query,
|
||||
is_anonymized,
|
||||
has_good_page_experience,
|
||||
search_type,
|
||||
search_appearance,
|
||||
country_code,
|
||||
device_type,
|
||||
impressions,
|
||||
clicks,
|
||||
average_position
|
||||
FROM
|
||||
`moz-fx-data-marketing-prod.google_search_console_derived.search_impressions_by_page_v2`
|
||||
WHERE
|
||||
`date` >= '2023-08-01'
|
||||
search_impressions_union AS search_impressions
|
||||
LEFT JOIN
|
||||
`moz-fx-data-shared-prod.static.language_codes_v1` AS localized_site_language
|
||||
ON search_impressions.localized_site_language_code = localized_site_language.code_2
|
||||
LEFT JOIN
|
||||
`moz-fx-data-shared-prod.static.country_codes_v1` AS localized_site_country
|
||||
ON search_impressions.localized_site_country_code = localized_site_country.code
|
||||
LEFT JOIN
|
||||
`moz-fx-data-shared-prod.static.country_codes_v1` AS user_country
|
||||
ON search_impressions.user_country_code = user_country.code_3
|
||||
|
|
|
@ -3,15 +3,20 @@ description: |-
|
|||
Google Search impressions aggregated by site for the following domains:
|
||||
* addons.mozilla.org
|
||||
* blog.mozilla.org
|
||||
* developer.mozilla.org
|
||||
* getpocket.com
|
||||
* support.mozilla.org
|
||||
* www.mozilla.org
|
||||
|
||||
Anonymized search queries are included from August 2023 onward.
|
||||
For the developer.mozilla.org domain:
|
||||
* Records from 2024-04-10 onward are from source data exported directly to BigQuery by Google.
|
||||
* Records before 2024-04-10 are from source data synced to BigQuery by Fivetran.
|
||||
|
||||
Source data from before August 2023 was synced to BigQuery by Fivetran.
|
||||
Source data from August 2023 onward was exported directly to BigQuery by Google.
|
||||
See https://bugzilla.mozilla.org/show_bug.cgi?id=1764960#c34.
|
||||
For the other domains:
|
||||
* Records from 2023-08-01 onward are from source data exported directly to BigQuery by Google.
|
||||
* Records before 2023-08-01 are from source data synced to BigQuery by Fivetran.
|
||||
|
||||
Anonymized search queries are only included if the source data was exported directly to BigQuery by Google.
|
||||
owners:
|
||||
- srose@mozilla.com
|
||||
labels:
|
||||
|
|
|
@ -17,6 +17,15 @@ fields:
|
|||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: The search query.
|
||||
- name: query_type
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: |-
|
||||
Type of search query:
|
||||
* Anonymized: Query was redacted by Google to protect the users' privacy.
|
||||
* Brand: Query contained one or more Mozilla brand keywords.
|
||||
* Non-Brand: Query didn't contain any Mozilla brand keywords.
|
||||
* Unknown: Query couldn't be classified.
|
||||
- name: is_anonymized
|
||||
type: BOOLEAN
|
||||
mode: NULLABLE
|
||||
|
@ -32,10 +41,14 @@ fields:
|
|||
* Image: In Google Search's "Images" tab.
|
||||
* Video: In Google Search's "Videos" tab.
|
||||
* News: In Google Search's "News" tab.
|
||||
- name: country_code
|
||||
- name: user_country_code
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: Country from which the search was made, in ISO-3166-1-Alpha-3 format.
|
||||
description: Country from which the user was searching, in ISO-3166-1-alpha-3 format.
|
||||
- name: user_country
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: Country from which the user was searching.
|
||||
- name: device_type
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
|
|
|
@ -1,36 +1,83 @@
|
|||
CREATE OR REPLACE VIEW
|
||||
`moz-fx-data-marketing-prod.google_search_console.search_impressions_by_site`
|
||||
AS
|
||||
WITH search_impressions_union AS (
|
||||
SELECT
|
||||
`date`,
|
||||
site_url,
|
||||
site_domain_name,
|
||||
query,
|
||||
FALSE AS is_anonymized,
|
||||
search_type,
|
||||
user_country_code,
|
||||
device_type,
|
||||
impressions,
|
||||
clicks,
|
||||
average_top_position
|
||||
FROM
|
||||
`moz-fx-data-marketing-prod.google_search_console_derived.search_impressions_by_site_v1`
|
||||
WHERE
|
||||
CASE
|
||||
WHEN site_domain_name IN (
|
||||
'addons.mozilla.org',
|
||||
'blog.mozilla.org',
|
||||
'getpocket.com',
|
||||
'support.mozilla.org',
|
||||
'www.mozilla.org'
|
||||
)
|
||||
THEN `date` < '2023-08-01'
|
||||
WHEN site_domain_name = 'developer.mozilla.org'
|
||||
THEN `date` < '2024-04-10'
|
||||
ELSE FALSE
|
||||
END
|
||||
UNION ALL
|
||||
SELECT
|
||||
`date`,
|
||||
site_url,
|
||||
site_domain_name,
|
||||
query,
|
||||
is_anonymized,
|
||||
search_type,
|
||||
user_country_code,
|
||||
device_type,
|
||||
impressions,
|
||||
clicks,
|
||||
average_top_position
|
||||
FROM
|
||||
`moz-fx-data-marketing-prod.google_search_console_derived.search_impressions_by_site_v2`
|
||||
WHERE
|
||||
CASE
|
||||
WHEN site_domain_name IN (
|
||||
'addons.mozilla.org',
|
||||
'blog.mozilla.org',
|
||||
'getpocket.com',
|
||||
'support.mozilla.org',
|
||||
'www.mozilla.org'
|
||||
)
|
||||
THEN `date` >= '2023-08-01'
|
||||
ELSE TRUE
|
||||
END
|
||||
)
|
||||
SELECT
|
||||
`date`,
|
||||
site_url,
|
||||
site_domain_name,
|
||||
query,
|
||||
FALSE AS is_anonymized,
|
||||
search_type,
|
||||
country_code,
|
||||
device_type,
|
||||
impressions,
|
||||
clicks,
|
||||
average_top_position
|
||||
search_impressions.`date`,
|
||||
search_impressions.site_url,
|
||||
search_impressions.site_domain_name,
|
||||
search_impressions.query,
|
||||
mozfun.google_search_console.classify_site_query(
|
||||
search_impressions.site_domain_name,
|
||||
search_impressions.query,
|
||||
search_impressions.search_type
|
||||
) AS query_type,
|
||||
search_impressions.is_anonymized,
|
||||
search_impressions.search_type,
|
||||
search_impressions.user_country_code,
|
||||
COALESCE(user_country.name, search_impressions.user_country_code) AS user_country,
|
||||
search_impressions.device_type,
|
||||
search_impressions.impressions,
|
||||
search_impressions.clicks,
|
||||
search_impressions.average_top_position
|
||||
FROM
|
||||
`moz-fx-data-marketing-prod.google_search_console_derived.search_impressions_by_site_v1`
|
||||
WHERE
|
||||
`date` < '2023-08-01'
|
||||
UNION ALL
|
||||
SELECT
|
||||
`date`,
|
||||
site_url,
|
||||
site_domain_name,
|
||||
query,
|
||||
is_anonymized,
|
||||
search_type,
|
||||
country_code,
|
||||
device_type,
|
||||
impressions,
|
||||
clicks,
|
||||
average_top_position
|
||||
FROM
|
||||
`moz-fx-data-marketing-prod.google_search_console_derived.search_impressions_by_site_v2`
|
||||
WHERE
|
||||
`date` >= '2023-08-01'
|
||||
search_impressions_union AS search_impressions
|
||||
LEFT JOIN
|
||||
`moz-fx-data-shared-prod.static.country_codes_v1` AS user_country
|
||||
ON search_impressions.user_country_code = user_country.code_3
|
||||
|
|
|
@ -3,23 +3,28 @@ description: |-
|
|||
Google Search impressions aggregated by page, synced by Fivetran to BigQuery for the following domains:
|
||||
* addons.mozilla.org
|
||||
* blog.mozilla.org
|
||||
* developer.mozilla.org
|
||||
* getpocket.com
|
||||
* support.mozilla.org
|
||||
* www.mozilla.org
|
||||
|
||||
Anonymized search queries aren't included.
|
||||
|
||||
We stopped syncing Google Search Console data with Fivetran in August 2023.
|
||||
For the developer.mozilla.org domain, we stopped syncing Google Search Console data with Fivetran in May 2024.
|
||||
See https://bugzilla.mozilla.org/show_bug.cgi?id=1890816.
|
||||
|
||||
For the other domains, we stopped syncing Google Search Console data with Fivetran in August 2023.
|
||||
See https://bugzilla.mozilla.org/show_bug.cgi?id=1764960#c44.
|
||||
owners:
|
||||
- srose@mozilla.com
|
||||
labels:
|
||||
incremental: true
|
||||
owner1: srose
|
||||
# Not scheduled because we stopped syncing Google Search Console data with Fivetran in August 2023.
|
||||
# See https://bugzilla.mozilla.org/show_bug.cgi?id=1764960#c44.
|
||||
#scheduling:
|
||||
# date_partition_parameter: date
|
||||
scheduling:
|
||||
# Not scheduled because we stopped syncing Google Search Console data with Fivetran.
|
||||
# See https://bugzilla.mozilla.org/show_bug.cgi?id=1764960#c44 and https://bugzilla.mozilla.org/show_bug.cgi?id=1890816.
|
||||
#dag_name: bqetl_google_search_console
|
||||
date_partition_parameter: date
|
||||
bigquery:
|
||||
time_partitioning:
|
||||
type: day
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
{% set fivetran_gsc_dataset_ids = [
|
||||
'moz-fx-data-bq-fivetran.google_search_console_addons',
|
||||
'moz-fx-data-bq-fivetran.google_search_console_blog',
|
||||
'moz-fx-data-bq-fivetran.google_search_console_pocket',
|
||||
'moz-fx-data-bq-fivetran.google_search_console_support',
|
||||
'moz-fx-data-bq-fivetran.google_search_console_www',
|
||||
{% set fivetran_gsc_datasets = [
|
||||
{'id': 'moz-fx-data-bq-fivetran.google_search_console_addons', 'query_column': 'keyword'},
|
||||
{'id': 'moz-fx-data-bq-fivetran.google_search_console_blog', 'query_column': 'keyword'},
|
||||
{'id': 'moz-fx-data-bq-fivetran.google_search_console_mdn', 'query_column': 'query'},
|
||||
{'id': 'moz-fx-data-bq-fivetran.google_search_console_pocket', 'query_column': 'keyword'},
|
||||
{'id': 'moz-fx-data-bq-fivetran.google_search_console_support', 'query_column': 'keyword'},
|
||||
{'id': 'moz-fx-data-bq-fivetran.google_search_console_www', 'query_column': 'keyword'},
|
||||
] %}
|
||||
WITH keyword_page_report_union AS (
|
||||
{% for fivetran_gsc_dataset_id in fivetran_gsc_dataset_ids %}
|
||||
{% for fivetran_gsc_dataset in fivetran_gsc_datasets %}
|
||||
{% if not loop.first %}
|
||||
UNION ALL
|
||||
{% endif %}
|
||||
|
@ -14,7 +15,7 @@ WITH keyword_page_report_union AS (
|
|||
`date`,
|
||||
site,
|
||||
page,
|
||||
keyword,
|
||||
`{{ fivetran_gsc_dataset['query_column'] }}` AS query,
|
||||
search_type,
|
||||
country,
|
||||
device,
|
||||
|
@ -22,20 +23,22 @@ WITH keyword_page_report_union AS (
|
|||
clicks,
|
||||
position
|
||||
FROM
|
||||
`{{ fivetran_gsc_dataset_id }}.keyword_page_report`
|
||||
`{{ fivetran_gsc_dataset['id'] }}.keyword_page_report`
|
||||
{% endfor %}
|
||||
)
|
||||
SELECT
|
||||
`date`,
|
||||
site AS site_url,
|
||||
REGEXP_EXTRACT(site, r'^(?:https?://|sc-domain:)([^/]+)') AS site_domain_name,
|
||||
mozfun.google_search_console.extract_url_domain_name(site) AS site_domain_name,
|
||||
page AS page_url,
|
||||
REGEXP_EXTRACT(page, r'^https?://([^/]+)') AS page_domain_name,
|
||||
REGEXP_EXTRACT(page, r'^https?://(?:[^/]+)([^\?#]*)') AS page_path,
|
||||
REGEXP_EXTRACT(page, r'^https?://(?:[^/]+)/*([^/\?#]*)') AS page_path_segment_1,
|
||||
keyword AS query,
|
||||
mozfun.google_search_console.extract_url_domain_name(page) AS page_domain_name,
|
||||
mozfun.google_search_console.extract_url_path(page) AS page_path,
|
||||
mozfun.google_search_console.extract_url_locale(page) AS localized_site_code,
|
||||
mozfun.google_search_console.extract_url_language_code(page) AS localized_site_language_code,
|
||||
mozfun.google_search_console.extract_url_country_code(page) AS localized_site_country_code,
|
||||
query,
|
||||
INITCAP(search_type) AS search_type,
|
||||
UPPER(country) AS country_code,
|
||||
UPPER(country) AS user_country_code,
|
||||
INITCAP(device) AS device_type,
|
||||
CAST(impressions AS INTEGER) AS impressions,
|
||||
CAST(clicks AS INTEGER) AS clicks,
|
||||
|
|
|
@ -25,10 +25,18 @@ fields:
|
|||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: The path part of the page URL.
|
||||
- name: page_path_segment_1
|
||||
- name: localized_site_code
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: The first segment of the page URL path, which is often a locale like `en-US` or `de`.
|
||||
description: Localized site code such as `en-US` or `de` found in the first segment of the page URL path (if any).
|
||||
- name: localized_site_language_code
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: Localized site language code in ISO-639-alpha-2 format found in the first segment of the page URL path (if any).
|
||||
- name: localized_site_country_code
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: Localized site country code in ISO-3166-1-alpha-2 format found in the first segment of the page URL path (if any).
|
||||
- name: query
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
|
@ -42,10 +50,10 @@ fields:
|
|||
* Image: In Google Search's "Images" tab.
|
||||
* Video: In Google Search's "Videos" tab.
|
||||
* News: In Google Search's "News" tab.
|
||||
- name: country_code
|
||||
- name: user_country_code
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: Country from which the search was made, in ISO-3166-1-Alpha-3 format.
|
||||
description: Country from which the user was searching, in ISO-3166-1-alpha-3 format.
|
||||
- name: device_type
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
|
|
|
@ -3,13 +3,17 @@ description: |-
|
|||
Google Search impressions aggregated by page, exported directly to BigQuery for the following domains:
|
||||
* addons.mozilla.org
|
||||
* blog.mozilla.org
|
||||
* developer.mozilla.org
|
||||
* getpocket.com
|
||||
* support.mozilla.org
|
||||
* www.mozilla.org
|
||||
|
||||
Anonymized search queries are included.
|
||||
|
||||
We started exporting Google Search Console data directly to BigQuery in July 2023.
|
||||
For the developer.mozilla.org domain, we started exporting Google Search Console data directly to BigQuery in April 2024.
|
||||
See https://bugzilla.mozilla.org/show_bug.cgi?id=1890816.
|
||||
|
||||
For the other domains, we started exporting Google Search Console data directly to BigQuery in July 2023.
|
||||
See https://bugzilla.mozilla.org/show_bug.cgi?id=1764960#c34.
|
||||
owners:
|
||||
- srose@mozilla.com
|
||||
|
@ -31,6 +35,9 @@ scheduling:
|
|||
- task_id: wait_for_google_search_console_getpocket_url_impressions
|
||||
table_id: moz-fx-data-marketing-prod.searchconsole_getpocket.searchdata_url_impression
|
||||
partition_id: '{{ data_interval_start.subtract(days=1) | ds_nodash }}'
|
||||
- task_id: wait_for_google_search_console_mdn_url_impressions
|
||||
table_id: moz-fx-data-marketing-prod.searchconsole_mdn.searchdata_url_impression
|
||||
partition_id: '{{ data_interval_start.subtract(days=1) | ds_nodash }}'
|
||||
- task_id: wait_for_google_search_console_support_url_impressions
|
||||
table_id: moz-fx-data-marketing-prod.searchconsole_support.searchdata_url_impression
|
||||
partition_id: '{{ data_interval_start.subtract(days=1) | ds_nodash }}'
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
'moz-fx-data-marketing-prod.searchconsole_addons',
|
||||
'moz-fx-data-marketing-prod.searchconsole_blog',
|
||||
'moz-fx-data-marketing-prod.searchconsole_getpocket',
|
||||
'moz-fx-data-marketing-prod.searchconsole_mdn',
|
||||
'moz-fx-data-marketing-prod.searchconsole_support',
|
||||
'moz-fx-data-marketing-prod.searchconsole_www',
|
||||
] %}
|
||||
|
@ -61,11 +62,13 @@ WITH searchdata_url_impression_union AS (
|
|||
SELECT
|
||||
data_date AS `date`,
|
||||
site_url,
|
||||
REGEXP_EXTRACT(site_url, r'^(?:https?://|sc-domain:)([^/]+)') AS site_domain_name,
|
||||
mozfun.google_search_console.extract_url_domain_name(site_url) AS site_domain_name,
|
||||
url AS page_url,
|
||||
REGEXP_EXTRACT(url, r'^https?://([^/]+)') AS page_domain_name,
|
||||
REGEXP_EXTRACT(url, r'^https?://(?:[^/]+)([^\?#]*)') AS page_path,
|
||||
REGEXP_EXTRACT(url, r'^https?://(?:[^/]+)/*([^/\?#]*)') AS page_path_segment_1,
|
||||
mozfun.google_search_console.extract_url_domain_name(url) AS page_domain_name,
|
||||
mozfun.google_search_console.extract_url_path(url) AS page_path,
|
||||
mozfun.google_search_console.extract_url_locale(url) AS localized_site_code,
|
||||
mozfun.google_search_console.extract_url_language_code(url) AS localized_site_language_code,
|
||||
mozfun.google_search_console.extract_url_country_code(url) AS localized_site_country_code,
|
||||
query,
|
||||
(is_anonymized_query OR is_anonymized_discover) AS is_anonymized,
|
||||
is_page_experience AS has_good_page_experience,
|
||||
|
@ -77,7 +80,7 @@ SELECT
|
|||
{% endfor %}
|
||||
ELSE 'Normal result'
|
||||
END AS search_appearance,
|
||||
UPPER(country) AS country_code,
|
||||
UPPER(country) AS user_country_code,
|
||||
INITCAP(device) AS device_type,
|
||||
impressions,
|
||||
clicks,
|
||||
|
|
|
@ -31,11 +31,23 @@ fields:
|
|||
description: |-
|
||||
The path part of the page URL.
|
||||
This will be null for anonymized Discover impressions.
|
||||
- name: page_path_segment_1
|
||||
- name: localized_site_code
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: |-
|
||||
The first segment of the page URL path, which is often a locale like `en-US` or `de`.
|
||||
Localized site code such as `en-US` or `de` found in the first segment of the page URL path (if any).
|
||||
This will be null for anonymized Discover impressions.
|
||||
- name: localized_site_language_code
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: |-
|
||||
Localized site language code in ISO-639-alpha-2 format found in the first segment of the page URL path (if any).
|
||||
This will be null for anonymized Discover impressions.
|
||||
- name: localized_site_country_code
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: |-
|
||||
Localized site country code in ISO-3166-1-alpha-2 format found in the first segment of the page URL path (if any).
|
||||
This will be null for anonymized Discover impressions.
|
||||
- name: query
|
||||
type: STRING
|
||||
|
@ -69,11 +81,11 @@ fields:
|
|||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: How the search result appeared (e.g. normal result, translated result, video).
|
||||
- name: country_code
|
||||
- name: user_country_code
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: |-
|
||||
Country from which the search was made, in ISO-3166-1-Alpha-3 format.
|
||||
Country from which the user was searching, in ISO-3166-1-alpha-3 format.
|
||||
This will be null for anonymized Discover impressions.
|
||||
- name: device_type
|
||||
type: STRING
|
||||
|
|
|
@ -3,23 +3,28 @@ description: |-
|
|||
Google Search impressions aggregated by site, synced by Fivetran to BigQuery for the following domains:
|
||||
* addons.mozilla.org
|
||||
* blog.mozilla.org
|
||||
* developer.mozilla.org
|
||||
* getpocket.com
|
||||
* support.mozilla.org
|
||||
* www.mozilla.org
|
||||
|
||||
Anonymized search queries aren't included.
|
||||
|
||||
We stopped syncing Google Search Console data with Fivetran in August 2023.
|
||||
For the developer.mozilla.org domain, we stopped syncing Google Search Console data with Fivetran in May 2024.
|
||||
See https://bugzilla.mozilla.org/show_bug.cgi?id=1890816.
|
||||
|
||||
For the other domains, we stopped syncing Google Search Console data with Fivetran in August 2023.
|
||||
See https://bugzilla.mozilla.org/show_bug.cgi?id=1764960#c44.
|
||||
owners:
|
||||
- srose@mozilla.com
|
||||
labels:
|
||||
incremental: true
|
||||
owner1: srose
|
||||
# Not scheduled because we stopped syncing Google Search Console data with Fivetran in August 2023.
|
||||
# See https://bugzilla.mozilla.org/show_bug.cgi?id=1764960#c44.
|
||||
#scheduling:
|
||||
# date_partition_parameter: date
|
||||
scheduling:
|
||||
# Not scheduled because we stopped syncing Google Search Console data with Fivetran.
|
||||
# See https://bugzilla.mozilla.org/show_bug.cgi?id=1764960#c44 and https://bugzilla.mozilla.org/show_bug.cgi?id=1890816.
|
||||
#dag_name: bqetl_google_search_console
|
||||
date_partition_parameter: date
|
||||
bigquery:
|
||||
time_partitioning:
|
||||
type: day
|
||||
|
|
|
@ -1,19 +1,20 @@
|
|||
{% set fivetran_gsc_dataset_ids = [
|
||||
'moz-fx-data-bq-fivetran.google_search_console_addons',
|
||||
'moz-fx-data-bq-fivetran.google_search_console_blog',
|
||||
'moz-fx-data-bq-fivetran.google_search_console_pocket',
|
||||
'moz-fx-data-bq-fivetran.google_search_console_support',
|
||||
'moz-fx-data-bq-fivetran.google_search_console_www',
|
||||
{% set fivetran_gsc_datasets = [
|
||||
{'id': 'moz-fx-data-bq-fivetran.google_search_console_addons', 'query_column': 'keyword'},
|
||||
{'id': 'moz-fx-data-bq-fivetran.google_search_console_blog', 'query_column': 'keyword'},
|
||||
{'id': 'moz-fx-data-bq-fivetran.google_search_console_mdn', 'query_column': 'query'},
|
||||
{'id': 'moz-fx-data-bq-fivetran.google_search_console_pocket', 'query_column': 'keyword'},
|
||||
{'id': 'moz-fx-data-bq-fivetran.google_search_console_support', 'query_column': 'keyword'},
|
||||
{'id': 'moz-fx-data-bq-fivetran.google_search_console_www', 'query_column': 'keyword'},
|
||||
] %}
|
||||
WITH keyword_site_report_by_site_union AS (
|
||||
{% for fivetran_gsc_dataset_id in fivetran_gsc_dataset_ids %}
|
||||
{% for fivetran_gsc_dataset in fivetran_gsc_datasets %}
|
||||
{% if not loop.first %}
|
||||
UNION ALL
|
||||
{% endif %}
|
||||
SELECT
|
||||
`date`,
|
||||
site,
|
||||
keyword,
|
||||
`{{ fivetran_gsc_dataset['query_column'] }}` AS query,
|
||||
search_type,
|
||||
country,
|
||||
device,
|
||||
|
@ -21,16 +22,16 @@ WITH keyword_site_report_by_site_union AS (
|
|||
clicks,
|
||||
position
|
||||
FROM
|
||||
`{{ fivetran_gsc_dataset_id }}.keyword_site_report_by_site`
|
||||
`{{ fivetran_gsc_dataset['id'] }}.keyword_site_report_by_site`
|
||||
{% endfor %}
|
||||
)
|
||||
SELECT
|
||||
`date`,
|
||||
site AS site_url,
|
||||
REGEXP_EXTRACT(site, r'^(?:https?://|sc-domain:)([^/]+)') AS site_domain_name,
|
||||
keyword AS query,
|
||||
mozfun.google_search_console.extract_url_domain_name(site) AS site_domain_name,
|
||||
query,
|
||||
INITCAP(search_type) AS search_type,
|
||||
UPPER(country) AS country_code,
|
||||
UPPER(country) AS user_country_code,
|
||||
INITCAP(device) AS device_type,
|
||||
CAST(impressions AS INTEGER) AS impressions,
|
||||
CAST(clicks AS INTEGER) AS clicks,
|
||||
|
|
|
@ -26,10 +26,10 @@ fields:
|
|||
* Image: In Google Search's "Images" tab.
|
||||
* Video: In Google Search's "Videos" tab.
|
||||
* News: In Google Search's "News" tab.
|
||||
- name: country_code
|
||||
- name: user_country_code
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: Country from which the search was made, in ISO-3166-1-Alpha-3 format.
|
||||
description: Country from which the user was searching, in ISO-3166-1-alpha-3 format.
|
||||
- name: device_type
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
|
|
|
@ -3,13 +3,17 @@ description: |-
|
|||
Google Search impressions aggregated by site, exported directly to BigQuery for the following domains:
|
||||
* addons.mozilla.org
|
||||
* blog.mozilla.org
|
||||
* developer.mozilla.org
|
||||
* getpocket.com
|
||||
* support.mozilla.org
|
||||
* www.mozilla.org
|
||||
|
||||
Anonymized search queries are included.
|
||||
|
||||
We started exporting Google Search Console data directly to BigQuery in July 2023.
|
||||
For the developer.mozilla.org domain, we started exporting Google Search Console data directly to BigQuery in April 2024.
|
||||
See https://bugzilla.mozilla.org/show_bug.cgi?id=1890816.
|
||||
|
||||
For the other domains, we started exporting Google Search Console data directly to BigQuery in July 2023.
|
||||
See https://bugzilla.mozilla.org/show_bug.cgi?id=1764960#c34.
|
||||
owners:
|
||||
- srose@mozilla.com
|
||||
|
@ -31,6 +35,9 @@ scheduling:
|
|||
- task_id: wait_for_google_search_console_getpocket_site_impressions
|
||||
table_id: moz-fx-data-marketing-prod.searchconsole_getpocket.searchdata_site_impression
|
||||
partition_id: '{{ data_interval_start.subtract(days=1) | ds_nodash }}'
|
||||
- task_id: wait_for_google_search_console_mdn_site_impressions
|
||||
table_id: moz-fx-data-marketing-prod.searchconsole_mdn.searchdata_site_impression
|
||||
partition_id: '{{ data_interval_start.subtract(days=1) | ds_nodash }}'
|
||||
- task_id: wait_for_google_search_console_support_site_impressions
|
||||
table_id: moz-fx-data-marketing-prod.searchconsole_support.searchdata_site_impression
|
||||
partition_id: '{{ data_interval_start.subtract(days=1) | ds_nodash }}'
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
'moz-fx-data-marketing-prod.searchconsole_addons',
|
||||
'moz-fx-data-marketing-prod.searchconsole_blog',
|
||||
'moz-fx-data-marketing-prod.searchconsole_getpocket',
|
||||
'moz-fx-data-marketing-prod.searchconsole_mdn',
|
||||
'moz-fx-data-marketing-prod.searchconsole_support',
|
||||
'moz-fx-data-marketing-prod.searchconsole_www',
|
||||
] %}
|
||||
|
@ -28,11 +29,11 @@ WITH searchdata_site_impression_union AS (
|
|||
SELECT
|
||||
data_date AS `date`,
|
||||
site_url,
|
||||
REGEXP_EXTRACT(site_url, r'^(?:https?://|sc-domain:)([^/]+)') AS site_domain_name,
|
||||
mozfun.google_search_console.extract_url_domain_name(site_url) AS site_domain_name,
|
||||
query,
|
||||
is_anonymized_query AS is_anonymized,
|
||||
INITCAP(REPLACE(search_type, '_', ' ')) AS search_type,
|
||||
UPPER(country) AS country_code,
|
||||
UPPER(country) AS user_country_code,
|
||||
INITCAP(device) AS device_type,
|
||||
impressions,
|
||||
clicks,
|
||||
|
|
|
@ -32,10 +32,10 @@ fields:
|
|||
* Image: In Google Search's "Images" tab.
|
||||
* Video: In Google Search's "Videos" tab.
|
||||
* News: In Google Search's "News" tab.
|
||||
- name: country_code
|
||||
- name: user_country_code
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
description: Country from which the search was made, in ISO-3166-1-Alpha-3 format.
|
||||
description: Country from which the user was searching, in ISO-3166-1-alpha-3 format.
|
||||
- name: device_type
|
||||
type: STRING
|
||||
mode: NULLABLE
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
# google_search_console
|
||||
|
||||
Functions for use with Google Search Console data.
|
|
@ -0,0 +1,3 @@
|
|||
friendly_name: Classify Site Query
|
||||
description: >-
|
||||
Classify a Google search query for a site as "Anonymized", "Brand", "Non-Brand", or "Unknown".
|
|
@ -0,0 +1,137 @@
|
|||
CREATE OR REPLACE FUNCTION google_search_console.classify_site_query(
|
||||
site_domain_name STRING,
|
||||
query STRING,
|
||||
search_type STRING
|
||||
)
|
||||
RETURNS STRING AS (
|
||||
CASE
|
||||
-- Discover and Google News search impressions never have `query` values.
|
||||
WHEN search_type IN ('Discover', 'Google News')
|
||||
THEN NULL
|
||||
WHEN query IS NULL
|
||||
THEN 'Anonymized'
|
||||
WHEN site_domain_name = 'www.mozilla.org'
|
||||
THEN IF(
|
||||
REGEXP_CONTAINS(
|
||||
query,
|
||||
ARRAY_TO_STRING(
|
||||
[
|
||||
r'\bff\b',
|
||||
r'\bm.z',
|
||||
r'f.ref.x',
|
||||
r'fier',
|
||||
r'fire',
|
||||
r'firf',
|
||||
r'focus',
|
||||
r'fokku',
|
||||
r'fox',
|
||||
r'il+a\b',
|
||||
r'nightly',
|
||||
r'quantum',
|
||||
r'μοζ+ιλ+α', -- moz+il+a (Greek)
|
||||
r'μοτζιλα', -- motzila (Greek)
|
||||
r'лиса', -- fox (Cyrillic)
|
||||
r'мази', -- mazi (Cyrillic)
|
||||
r'мазі', -- mazi (Cyrillic)
|
||||
r'моз', -- moz (Cyrillic)
|
||||
r'муз', -- muz (Cyrillic)
|
||||
r'фаер', -- fire (Cyrillic)
|
||||
r'фаир', -- fair (Cyrillic)
|
||||
r'файер', -- fire (Cyrillic)
|
||||
r'файр', -- fire (Cyrillic)
|
||||
r'фире', -- fire (Cyrillic)
|
||||
r'фокс', -- fox (Cyrillic)
|
||||
r'фох', -- fox (Cyrillic)
|
||||
r'כןרקכםס', -- yes, rakhems (Hebrew)
|
||||
r'מוזילה', -- mozilla (Hebrew)
|
||||
r'פיירפוקס', -- firefox (Hebrew)
|
||||
r'فاکس', -- fax (Arabic)
|
||||
r'فاير', -- fire (Arabic)
|
||||
r'فایر', -- fire (Arabic)
|
||||
r'فكس', -- fx (Arabic)
|
||||
r'فوكس', -- fox (Arabic)
|
||||
r'فير', -- fir (Arabic)
|
||||
r'موزلا', -- mozilla (Arabic)
|
||||
r'موزيلا', -- mozilla (Arabic)
|
||||
r'موزیلا', -- mozilla (Arabic)
|
||||
r'फायरफक्स', -- firefox (Indic)
|
||||
r'फायरफॉक्स', -- firefox (Indic)
|
||||
r'फ़ायरफ़ॉक्स', -- firefox (Indic)
|
||||
r'मोजिला', -- mozilla (Indic)
|
||||
r'मोज़िला', -- mozilla (Indic)
|
||||
r'ফায়ারফক্স', -- firefox (Indic)
|
||||
r'মজিলা', -- mozilla (Indic)
|
||||
r'মোজিলা', -- mozilla (Indic)
|
||||
r'ฟายฟอก', -- bleach (Thai)
|
||||
r'ฟายฟ๊อก', -- firefox (Thai)
|
||||
r'ไฟ ฟอก', -- fire bleach (Thai)
|
||||
r'ไฟฟ็อก', -- fire fox (Thai)
|
||||
r'ไฟฟ๊อก', -- fire fox (Thai)
|
||||
r'ไฟฟอก', -- purifying light (Thai)
|
||||
r'ไฟร์ฟอกซ์', -- firefox (Thai)
|
||||
r'ไฟล์ฟอก', -- bleaching file (Thai)
|
||||
r'ไฟลฟอก', -- fire bleach (Thai)
|
||||
r'모질라', -- mozilla (Korean)
|
||||
r'파이어', -- fire (Korean)
|
||||
r'폭스', -- fox (Korean)
|
||||
r'화이어', -- fire (Korean)
|
||||
r'ふぁいあ', -- faia (Japanese)
|
||||
r'ファイア', -- fire (Japanese)
|
||||
r'ファイや', -- faiya (Japanese)
|
||||
r'ファイヤ', -- fire (Japanese)
|
||||
r'ふぁいやー', -- fire (Japanese)
|
||||
r'ふぃれふぉ', -- firefox (Japanese)
|
||||
r'ふぉっくす', -- fox (Japanese)
|
||||
r'フォックス', -- fox (Japanese)
|
||||
r'モジラ', -- mozilla (Japanese)
|
||||
r'火孤', -- firefox (Chinese)
|
||||
r'火狐', -- firefox (Chinese)
|
||||
r'狐狸' -- fox (Chinese)
|
||||
],
|
||||
'|'
|
||||
)
|
||||
),
|
||||
'Brand',
|
||||
'Non-Brand'
|
||||
)
|
||||
ELSE 'Unknown'
|
||||
END
|
||||
);
|
||||
|
||||
SELECT
|
||||
assert.equals(
|
||||
google_search_console.classify_site_query('www.mozilla.org', 'mozilla', 'Discover'),
|
||||
CAST(NULL AS STRING)
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.classify_site_query('www.mozilla.org', 'mozilla', 'Google News'),
|
||||
CAST(NULL AS STRING)
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.classify_site_query('www.mozilla.org', NULL, 'Discover'),
|
||||
CAST(NULL AS STRING)
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.classify_site_query('www.mozilla.org', NULL, 'Google News'),
|
||||
CAST(NULL AS STRING)
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.classify_site_query('www.mozilla.org', NULL, 'Web'),
|
||||
'Anonymized'
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.classify_site_query('www.mozilla.org', 'mozilla', 'Web'),
|
||||
'Brand'
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.classify_site_query('www.mozilla.org', 'firefox', 'Web'),
|
||||
'Brand'
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.classify_site_query('www.mozilla.org', 'browser', 'Web'),
|
||||
'Non-Brand'
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.classify_site_query('addons.mozilla.org', 'mozilla', 'Web'),
|
||||
'Unknown'
|
||||
),
|
|
@ -0,0 +1,3 @@
|
|||
friendly_name: Extract URL Country Code
|
||||
description: >-
|
||||
Extract the country code from a URL if it's present.
|
|
@ -0,0 +1,26 @@
|
|||
CREATE OR REPLACE FUNCTION google_search_console.extract_url_country_code(url STRING)
|
||||
RETURNS STRING AS (
|
||||
UPPER(SPLIT(google_search_console.extract_url_locale(url), '-')[SAFE_ORDINAL(2)])
|
||||
);
|
||||
|
||||
SELECT
|
||||
assert.equals(
|
||||
google_search_console.extract_url_country_code('https://www.mozilla.org/en-US/firefox/'),
|
||||
'US'
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_country_code('https://www.mozilla.org/en-us/firefox/'),
|
||||
'US'
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_country_code('https://support.mozilla.org/es/'),
|
||||
CAST(NULL AS STRING)
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_country_code('https://blog.mozilla.org/ux/'),
|
||||
CAST(NULL AS STRING)
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_country_code('https://www.mozilla.org/'),
|
||||
CAST(NULL AS STRING)
|
||||
),
|
|
@ -0,0 +1,3 @@
|
|||
friendly_name: Extract URL Domain Name
|
||||
description: >-
|
||||
Extract the domain name from a URL.
|
|
@ -0,0 +1,14 @@
|
|||
CREATE OR REPLACE FUNCTION google_search_console.extract_url_domain_name(url STRING)
|
||||
RETURNS STRING AS (
|
||||
REGEXP_EXTRACT(url, r'^(?:https?://|sc-domain:)([^/]+)')
|
||||
);
|
||||
|
||||
SELECT
|
||||
assert.equals(
|
||||
google_search_console.extract_url_domain_name('https://www.mozilla.org/'),
|
||||
'www.mozilla.org'
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_domain_name('sc-domain:addons.mozilla.org'),
|
||||
'addons.mozilla.org'
|
||||
),
|
|
@ -0,0 +1,3 @@
|
|||
friendly_name: Extract URL Language Code
|
||||
description: >-
|
||||
Extract the language code from a URL if it's present.
|
|
@ -0,0 +1,22 @@
|
|||
CREATE OR REPLACE FUNCTION google_search_console.extract_url_language_code(url STRING)
|
||||
RETURNS STRING AS (
|
||||
LOWER(SPLIT(google_search_console.extract_url_locale(url), '-')[SAFE_ORDINAL(1)])
|
||||
);
|
||||
|
||||
SELECT
|
||||
assert.equals(
|
||||
google_search_console.extract_url_language_code('https://www.mozilla.org/en-US/firefox/'),
|
||||
'en'
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_language_code('https://support.mozilla.org/es/'),
|
||||
'es'
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_language_code('https://blog.mozilla.org/ux/'),
|
||||
CAST(NULL AS STRING)
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_language_code('https://www.mozilla.org/'),
|
||||
CAST(NULL AS STRING)
|
||||
),
|
|
@ -0,0 +1,3 @@
|
|||
friendly_name: Extract URL Locale
|
||||
description: >-
|
||||
Extract the locale from a URL if it's present.
|
|
@ -0,0 +1,216 @@
|
|||
CREATE OR REPLACE FUNCTION google_search_console.extract_url_locale(url STRING)
|
||||
RETURNS STRING AS (
|
||||
IF(
|
||||
REGEXP_CONTAINS(
|
||||
google_search_console.extract_url_path_segment(url, 1),
|
||||
r'^[a-zA-Z]{2}-[a-zA-Z]{2}$'
|
||||
)
|
||||
OR LOWER(google_search_console.extract_url_path_segment(url, 1)) IN (
|
||||
'aa',
|
||||
'ab',
|
||||
'ae',
|
||||
'af',
|
||||
'ak',
|
||||
'am',
|
||||
'an',
|
||||
'ar',
|
||||
'as',
|
||||
'av',
|
||||
'ay',
|
||||
'az',
|
||||
'ba',
|
||||
'be',
|
||||
'bg',
|
||||
'bh',
|
||||
'bi',
|
||||
'bm',
|
||||
'bn',
|
||||
'bo',
|
||||
'br',
|
||||
'bs',
|
||||
'ca',
|
||||
'ce',
|
||||
'ch',
|
||||
'co',
|
||||
'cr',
|
||||
'cs',
|
||||
'cu',
|
||||
'cv',
|
||||
'cy',
|
||||
'da',
|
||||
'de',
|
||||
'dv',
|
||||
'dz',
|
||||
'ee',
|
||||
'el',
|
||||
'en',
|
||||
'eo',
|
||||
'es',
|
||||
'et',
|
||||
'eu',
|
||||
'fa',
|
||||
'ff',
|
||||
'fi',
|
||||
'fj',
|
||||
'fo',
|
||||
'fr',
|
||||
'fy',
|
||||
'ga',
|
||||
'gd',
|
||||
'gl',
|
||||
'gn',
|
||||
'gu',
|
||||
'gv',
|
||||
'ha',
|
||||
'he',
|
||||
'hi',
|
||||
'ho',
|
||||
'hr',
|
||||
'ht',
|
||||
'hu',
|
||||
'hy',
|
||||
'hz',
|
||||
'ia',
|
||||
'id',
|
||||
'ie',
|
||||
'ig',
|
||||
'ii',
|
||||
'ik',
|
||||
'io',
|
||||
'is',
|
||||
'it',
|
||||
'iu',
|
||||
'ja',
|
||||
'jv',
|
||||
'ka',
|
||||
'kg',
|
||||
'ki',
|
||||
'kj',
|
||||
'kk',
|
||||
'kl',
|
||||
'km',
|
||||
'kn',
|
||||
'ko',
|
||||
'kr',
|
||||
'ks',
|
||||
'ku',
|
||||
'kv',
|
||||
'kw',
|
||||
'ky',
|
||||
'la',
|
||||
'lb',
|
||||
'lg',
|
||||
'li',
|
||||
'ln',
|
||||
'lo',
|
||||
'lt',
|
||||
'lu',
|
||||
'lv',
|
||||
'mg',
|
||||
'mh',
|
||||
'mi',
|
||||
'mk',
|
||||
'ml',
|
||||
'mn',
|
||||
'mr',
|
||||
'ms',
|
||||
'mt',
|
||||
'my',
|
||||
'na',
|
||||
'nb',
|
||||
'nd',
|
||||
'ne',
|
||||
'ng',
|
||||
'nl',
|
||||
'nn',
|
||||
'no',
|
||||
'nr',
|
||||
'nv',
|
||||
'ny',
|
||||
'oc',
|
||||
'oj',
|
||||
'om',
|
||||
'or',
|
||||
'os',
|
||||
'pa',
|
||||
'pi',
|
||||
'pl',
|
||||
'ps',
|
||||
'pt',
|
||||
'qu',
|
||||
'rm',
|
||||
'rn',
|
||||
'ro',
|
||||
'ru',
|
||||
'rw',
|
||||
'sa',
|
||||
'sc',
|
||||
'sd',
|
||||
'se',
|
||||
'sg',
|
||||
'si',
|
||||
'sk',
|
||||
'sl',
|
||||
'sm',
|
||||
'sn',
|
||||
'so',
|
||||
'sq',
|
||||
'sr',
|
||||
'ss',
|
||||
'st',
|
||||
'su',
|
||||
'sv',
|
||||
'sw',
|
||||
'ta',
|
||||
'te',
|
||||
'tg',
|
||||
'th',
|
||||
'ti',
|
||||
'tk',
|
||||
'tl',
|
||||
'tn',
|
||||
'to',
|
||||
'tr',
|
||||
'ts',
|
||||
'tt',
|
||||
'tw',
|
||||
'ty',
|
||||
'ug',
|
||||
'uk',
|
||||
'ur',
|
||||
'uz',
|
||||
've',
|
||||
'vi',
|
||||
'vo',
|
||||
'wa',
|
||||
'wo',
|
||||
'xh',
|
||||
'yi',
|
||||
'yo',
|
||||
'za',
|
||||
'zh',
|
||||
'zu'
|
||||
),
|
||||
google_search_console.extract_url_path_segment(url, 1),
|
||||
NULL
|
||||
)
|
||||
);
|
||||
|
||||
SELECT
|
||||
assert.equals(
|
||||
google_search_console.extract_url_locale('https://www.mozilla.org/en-US/firefox/'),
|
||||
'en-US'
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_locale('https://www.mozilla.org/en-us/firefox/'),
|
||||
'en-us'
|
||||
),
|
||||
assert.equals(google_search_console.extract_url_locale('https://support.mozilla.org/es/'), 'es'),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_locale('https://blog.mozilla.org/ux/'),
|
||||
CAST(NULL AS STRING)
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_locale('https://www.mozilla.org/'),
|
||||
CAST(NULL AS STRING)
|
||||
),
|
|
@ -0,0 +1,3 @@
|
|||
friendly_name: Extract URL Path
|
||||
description: >-
|
||||
Extract the path from a URL.
|
|
@ -0,0 +1,20 @@
|
|||
CREATE OR REPLACE FUNCTION google_search_console.extract_url_path(url STRING)
|
||||
RETURNS STRING AS (
|
||||
REGEXP_EXTRACT(url, r'^https?://(?:[^/]+)([^\?#]*)')
|
||||
);
|
||||
|
||||
SELECT
|
||||
assert.equals(google_search_console.extract_url_path('https://www.mozilla.org'), ''),
|
||||
assert.equals(google_search_console.extract_url_path('https://www.mozilla.org/'), '/'),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_path('https://www.mozilla.org/en-US/firefox/'),
|
||||
'/en-US/firefox/'
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_path('https://www.mozilla.org/en-US/firefox/?foo'),
|
||||
'/en-US/firefox/'
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_path('https://www.mozilla.org/en-US/firefox/#foo'),
|
||||
'/en-US/firefox/'
|
||||
),
|
|
@ -0,0 +1,3 @@
|
|||
friendly_name: Extract URL Path Segment
|
||||
description: >-
|
||||
Extract a particular path segment from a URL.
|
|
@ -0,0 +1,21 @@
|
|||
CREATE OR REPLACE FUNCTION google_search_console.extract_url_path_segment(
|
||||
url STRING,
|
||||
segment_number INTEGER
|
||||
)
|
||||
RETURNS STRING AS (
|
||||
NULLIF(SPLIT(google_search_console.extract_url_path(url), '/')[SAFE_OFFSET(segment_number)], '')
|
||||
);
|
||||
|
||||
SELECT
|
||||
assert.equals(
|
||||
google_search_console.extract_url_path_segment('https://www.mozilla.org/en-US/firefox/', 1),
|
||||
'en-US'
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_path_segment('https://www.mozilla.org/en-US/firefox/', 2),
|
||||
'firefox'
|
||||
),
|
||||
assert.equals(
|
||||
google_search_console.extract_url_path_segment('https://www.mozilla.org/en-US/firefox/', 3),
|
||||
CAST(NULL AS STRING)
|
||||
),
|
Загрузка…
Ссылка в новой задаче