Fix #1457 - Generate and run Fenix ETL for GLAM in glam-fenix-dev (#1458)

* Resolve generated sql to glam-fenix-dev and change output in sql/ dir

* Add new script for testing glam-fenix queries

* Add generated sql for version control

* Use variables correctly in bash

* Remove latest versions from UDF

* Update test to generate minimum set of tables for nightly

* Commit generated queries for testing

* Cast only if not glob

* Ignore dryrun and publish view for glam-fenix-dev

* Fix linting error

* Update comments

* Use DST_PROJECT consistently in scripts

* Update comments

* Update script/glam/test/test_glean_org_mozilla_fenix_glam_nightly

Co-authored-by: Ben Wu <benjaminwu124@gmail.com>

* Update script/glam/generate_and_run_desktop_sql

Co-authored-by: Ben Wu <benjaminwu124@gmail.com>

Co-authored-by: Ben Wu <benjaminwu124@gmail.com>
This commit is contained in:
Anthony Miyaguchi 2020-10-22 11:40:52 -07:00 коммит произвёл GitHub
Родитель 8508fa35a5
Коммит b7695049c6
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
72 изменённых файлов: 3508 добавлений и 306 удалений

Просмотреть файл

@ -127,8 +127,8 @@ SKIP = {
"sql/moz-fx-data-shared-prod/telemetry_derived/clients_histogram_bucket_counts_v1/query.sql", # noqa E501
"sql/moz-fx-data-shared-prod/telemetry_derived/glam_client_probe_counts_extract_v1/query.sql", # noqa E501
"sql/moz-fx-data-shared-prod/telemetry_derived/asn_aggregates_v1/query.sql",
# Dataset sql/moz-fx-data-shared-prod:glam_etl was not found
*glob.glob("sql/moz-fx-data-shared-prod/glam_etl/**/*.sql", recursive=True),
# Dataset sql/glam-fenix-dev:glam_etl was not found
*glob.glob("sql/glam-fenix-dev/glam_etl/**/*.sql", recursive=True),
# Query templates
"sql/moz-fx-data-shared-prod/search_derived/mobile_search_clients_daily_v1/fenix_metrics.template.sql", # noqa E501
"sql/moz-fx-data-shared-prod/search_derived/mobile_search_clients_daily_v1/mobile_search_clients_daily.template.sql", # noqa

Просмотреть файл

@ -70,14 +70,15 @@ def main():
"""Generate GLAM ETL queries."""
parser = ArgumentParser(description=main.__doc__)
parser.add_argument("--prefix")
parser.add_argument("--project", default="glam-fenix-dev")
parser.add_argument("--dataset", default="glam_etl")
parser.add_argument("--sql-root", default="sql/moz-fx-data-shared-prod/")
parser.add_argument("--sql-root", default="sql/")
parser.add_argument("--daily-view-only", action="store_true", default=False)
args = parser.parse_args()
env = Environment(loader=PackageLoader("bigquery_etl", "glam/templates"))
dataset_path = Path(args.sql_root) / args.dataset
dataset_path = Path(args.sql_root) / args.project / args.dataset
if not dataset_path.is_dir():
raise NotADirectoryError(f"path to {dataset_path} not found")

Просмотреть файл

@ -56,7 +56,7 @@ aggregated AS (
{{ attributes }},
metric,
metric_type,
`moz-fx-data-shared-prod`.udf.map_sum(ARRAY_CONCAT_AGG(value)) as value
mozfun.map.sum(ARRAY_CONCAT_AGG(value)) as value
FROM
flattened_histograms
GROUP BY

Просмотреть файл

@ -1,6 +1,6 @@
{{ header }}
CREATE TABLE IF NOT EXISTS
`moz-fx-data-shared-prod.glam_etl.{{ prefix }}__clients_histogram_aggregates_v1`(
`{{ project }}.glam_etl.{{ prefix }}__clients_histogram_aggregates_v1`(
sample_id INT64,
client_id STRING,
ping_type STRING,

Просмотреть файл

@ -69,7 +69,7 @@ aggregated_daily AS (
SELECT
{{ attributes }},
{{ metric_attributes }},
`moz-fx-data-shared-prod`.udf.map_sum(ARRAY_CONCAT_AGG(value)) AS value
mozfun.map.sum(ARRAY_CONCAT_AGG(value)) AS value
FROM
filtered_daily
GROUP BY

Просмотреть файл

@ -19,7 +19,7 @@ RETURNS ARRAY<
aggregated_data AS (
SELECT AS STRUCT
{{ metric_attributes }},
`moz-fx-data-shared-prod`.udf.map_sum(ARRAY_CONCAT_AGG(value)) AS value
mozfun.map.sum(ARRAY_CONCAT_AGG(value)) AS value
FROM
unnested
GROUP BY

Просмотреть файл

@ -1,6 +1,6 @@
{{ header }}
CREATE TABLE IF NOT EXISTS
`moz-fx-data-shared-prod.{{ destination_table }}`(
`{{ project }}.{{ destination_table }}`(
{{ attributes_type }},
scalar_aggregates {{ user_data_type }}
)

Просмотреть файл

@ -18,7 +18,7 @@ SELECT
ping_type,
os,
app_build_id as build_id,
SAFE_CAST({{ build_date_udf }}(app_build_id) AS STRING) as build_date,
IF(app_build_id="*", "*", SAFE_CAST({{ build_date_udf }}(app_build_id) AS STRING)) as build_date,
metric,
metric_type,
-- BigQuery has some null unicode characters which Postgresql doesn't like,

Просмотреть файл

@ -15,7 +15,7 @@ SELECT
app_version,
coalesce(ping_type, "*") as ping_type,
COALESCE(app_build_id, "*") as app_build_id,
SAFE_CAST({{ build_date_udf }}(app_build_id) AS STRING) AS build_date,
IF(app_build_id="*", "*", SAFE_CAST({{ build_date_udf }}(app_build_id) AS STRING))AS build_date,
COALESCE(os, "*") AS os,
total_users
FROM deduped

Просмотреть файл

@ -46,7 +46,6 @@ RETURNS ARRAY<STRUCT<key STRING, value FLOAT64>> AS (
CREATE TEMP FUNCTION udf_normalize_histograms(
arrs ARRAY<
STRUCT<
latest_version INT64,
metric STRING,
metric_type STRING,
key STRING,
@ -57,7 +56,6 @@ CREATE TEMP FUNCTION udf_normalize_histograms(
)
RETURNS ARRAY<
STRUCT<
latest_version INT64,
metric STRING,
metric_type STRING,
key STRING,

Просмотреть файл

@ -1,18 +1,18 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix_glam_beta__view_clients_daily_histogram_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_fenix_glam_beta__view_clients_daily_histogram_aggregates_v1
AS
WITH extracted AS (
SELECT
*
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix__view_clients_daily_histogram_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_fenix__view_clients_daily_histogram_aggregates_v1
WHERE
mozfun.norm.fenix_app_info('org_mozilla_fenix', app_build_id).channel = 'beta'
UNION ALL
SELECT
*
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_firefox_beta__view_clients_daily_histogram_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_firefox_beta__view_clients_daily_histogram_aggregates_v1
)
SELECT
* EXCEPT (app_build_id, channel),

Просмотреть файл

@ -1,18 +1,18 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix_glam_beta__view_clients_daily_scalar_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_fenix_glam_beta__view_clients_daily_scalar_aggregates_v1
AS
WITH extracted AS (
SELECT
*
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix__view_clients_daily_scalar_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_fenix__view_clients_daily_scalar_aggregates_v1
WHERE
mozfun.norm.fenix_app_info('org_mozilla_fenix', app_build_id).channel = 'beta'
UNION ALL
SELECT
*
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_firefox_beta__view_clients_daily_scalar_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_firefox_beta__view_clients_daily_scalar_aggregates_v1
)
SELECT
* EXCEPT (app_build_id, channel),

Просмотреть файл

@ -1,23 +1,23 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix_glam_nightly__view_clients_daily_histogram_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_fenix_glam_nightly__view_clients_daily_histogram_aggregates_v1
AS
WITH extracted AS (
SELECT
*
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix__view_clients_daily_histogram_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_fenix__view_clients_daily_histogram_aggregates_v1
WHERE
mozfun.norm.fenix_app_info('org_mozilla_fenix', app_build_id).channel = 'nightly'
UNION ALL
SELECT
*
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix_nightly__view_clients_daily_histogram_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_fenix_nightly__view_clients_daily_histogram_aggregates_v1
UNION ALL
SELECT
*
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fennec_aurora__view_clients_daily_histogram_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_fennec_aurora__view_clients_daily_histogram_aggregates_v1
)
SELECT
-- NOTE: app_version is dropped due to a lack of semantic versioning. We opt

Просмотреть файл

@ -1,23 +1,23 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix_glam_nightly__view_clients_daily_scalar_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_fenix_glam_nightly__view_clients_daily_scalar_aggregates_v1
AS
WITH extracted AS (
SELECT
*
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix__view_clients_daily_scalar_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_fenix__view_clients_daily_scalar_aggregates_v1
WHERE
mozfun.norm.fenix_app_info('org_mozilla_fenix', app_build_id).channel = 'nightly'
UNION ALL
SELECT
*
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix_nightly__view_clients_daily_scalar_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_fenix_nightly__view_clients_daily_scalar_aggregates_v1
UNION ALL
SELECT
*
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fennec_aurora__view_clients_daily_scalar_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_fennec_aurora__view_clients_daily_scalar_aggregates_v1
)
SELECT
-- NOTE: app_version is dropped due to a lack of semantic versioning. We opt

Просмотреть файл

@ -1,11 +1,11 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix_glam_release__view_clients_daily_histogram_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_fenix_glam_release__view_clients_daily_histogram_aggregates_v1
AS
WITH extracted AS (
SELECT
*
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_firefox__view_clients_daily_histogram_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_firefox__view_clients_daily_histogram_aggregates_v1
)
SELECT
* EXCEPT (app_build_id, channel),

Просмотреть файл

@ -1,11 +1,11 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix_glam_release__view_clients_daily_scalar_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_fenix_glam_release__view_clients_daily_scalar_aggregates_v1
AS
WITH extracted AS (
SELECT
*
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_firefox__view_clients_daily_scalar_aggregates_v1
`{{ project }}`.glam_etl.org_mozilla_firefox__view_clients_daily_scalar_aggregates_v1
)
SELECT
* EXCEPT (app_build_id, channel),

Просмотреть файл

@ -1,10 +1,10 @@
{{ header }}
-- View for histogram aggregates that handles time-partitioning
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.{{ dataset }}.{{ prefix }}__view_clients_daily_histogram_aggregates_v1`
`{{ project }}.{{ dataset }}.{{ prefix }}__view_clients_daily_histogram_aggregates_v1`
AS
SELECT
* EXCEPT (submission_date),
DATE(_PARTITIONTIME) AS submission_date
FROM
`moz-fx-data-shared-prod.{{ dataset }}.{{ prefix }}__clients_daily_histogram_aggregates*`
`{{ project }}.{{ dataset }}.{{ prefix }}__clients_daily_histogram_aggregates*`

Просмотреть файл

@ -1,10 +1,10 @@
{{ header }}
-- View to union daily scalar aggregates with date partitioning
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.{{ dataset }}.{{ prefix }}__view_clients_daily_scalar_aggregates_v1`
`{{ project }}.{{ dataset }}.{{ prefix }}__view_clients_daily_scalar_aggregates_v1`
AS
SELECT
* EXCEPT (submission_date),
DATE(_PARTITIONTIME) AS submission_date
FROM
`moz-fx-data-shared-prod.{{ dataset }}.{{ prefix }}__clients_daily_scalar_aggregates*`
`{{ project }}.{{ dataset }}.{{ prefix }}__clients_daily_scalar_aggregates*`

Просмотреть файл

@ -1,27 +1,27 @@
{{ header }}
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.{{ dataset }}.{{ prefix }}__view_probe_counts_v1`
`{{ project }}.{{ dataset }}.{{ prefix }}__view_probe_counts_v1`
AS
WITH all_counts AS (
SELECT
*
FROM
`moz-fx-data-shared-prod.{{ dataset }}.{{ prefix }}__scalar_probe_counts_v1`
`{{ project }}.{{ dataset }}.{{ prefix }}__scalar_probe_counts_v1`
UNION ALL
SELECT
*
FROM
`moz-fx-data-shared-prod.{{ dataset }}.{{ prefix }}__histogram_probe_counts_v1`
`{{ project }}.{{ dataset }}.{{ prefix }}__histogram_probe_counts_v1`
UNION ALL
SELECT
*
FROM
`moz-fx-data-shared-prod.{{ dataset }}.{{ prefix }}__scalar_percentiles_v1`
`{{ project }}.{{ dataset }}.{{ prefix }}__scalar_percentiles_v1`
UNION ALL
SELECT
*
FROM
`moz-fx-data-shared-prod.{{ dataset }}.{{ prefix }}__histogram_percentiles_v1`
`{{ project }}.{{ dataset }}.{{ prefix }}__histogram_percentiles_v1`
)
SELECT
*

Просмотреть файл

@ -3,20 +3,20 @@
{% from 'macros.sql' import enumerate_table_combinations %}
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.{{ dataset }}.{{ prefix }}__view_user_counts_v1`
`{{ project }}.{{ dataset }}.{{ prefix }}__view_user_counts_v1`
AS
WITH all_clients AS (
SELECT
client_id,
{{ attributes }}
FROM `moz-fx-data-shared-prod`.{{ dataset }}.{{ prefix }}__clients_scalar_aggregates_v1
FROM `{{ project }}`.{{ dataset }}.{{ prefix }}__clients_scalar_aggregates_v1
UNION ALL
SELECT
client_id,
{{ attributes }}
FROM `moz-fx-data-shared-prod`.{{ dataset }}.{{ prefix }}__clients_histogram_aggregates_v1
FROM `{{ project }}`.{{ dataset }}.{{ prefix }}__clients_histogram_aggregates_v1
),
{{
enumerate_table_combinations(

Просмотреть файл

@ -20,11 +20,9 @@ VIEWS_TO_SKIP = (
"firefox_accounts/fxa_amplitude_email_clicks/view.sql",
"pocket/pocket_reach_mau/view.sql",
"telemetry/buildhub2/view.sql",
# Dataset moz-fx-data-shared-prod:glam_etl was not found
*[
str(path)
for path in Path("sql/moz-fx-data-shared-prod").glob("glam_etl/**/view.sql")
],
# Dataset glam-fenix-dev:glam_etl was not found
# TODO: this should be removed if views are to be automatically deployed
*[str(path) for path in Path("sql/glam-fenix-dev").glob("glam_etl/**/view.sql")],
# View in project other than prod
"shredder_state/progress/view.sql",
)

Просмотреть файл

@ -1,6 +1,8 @@
#!/bin/bash
# import run_query procedure from run_glam_sql without running the script
PROJECT=${PROJECT:-moz-fx-data-shared-prod}
DST_PROJECT=${DST_PROJECT:-$PROJECT}
IMPORT=true source script/glam/run_glam_sql

Просмотреть файл

@ -6,7 +6,8 @@ function write_scalars {
local product=$1
local dataset=$2
local table=$3
local directory="sql/moz-fx-data-shared-prod/glam_etl/${product}__clients_daily_scalar_aggregates_${table}"
local dst_project=$4
local directory="sql/${dst_project}/glam_etl/${product}__clients_daily_scalar_aggregates_${table}"
mkdir -p "$directory"
if ! python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates \
--source-table "$dataset.$table" \
@ -22,7 +23,8 @@ function write_histograms {
local product=$1
local dataset=$2
local table=$3
local directory="sql/moz-fx-data-shared-prod/glam_etl/${product}__clients_daily_histogram_aggregates_${table}"
local dst_project=$4
local directory="sql/${dst_project}/glam_etl/${product}__clients_daily_histogram_aggregates_${table}"
mkdir -p "$directory"
if ! python3 -m bigquery_etl.glam.clients_daily_histogram_aggregates \
--source-table "$dataset.$table" \
@ -36,10 +38,11 @@ function write_histograms {
function write_clients_daily_aggregates {
local product=$1
local project=$2
local src_project=$2
local dst_project=$3
local dataset="${product}_stable"
local qualified="$project:$dataset"
local qualified="$src_project:$dataset"
# validate inputs with set -e, however note that this will fail silently
if ! bq ls "$qualified" &> /dev/null; then
echo "could not list $qualified"
@ -53,14 +56,14 @@ function write_clients_daily_aggregates {
# e.g. baseline_v1
local tables;
tables=$(
bq ls "$project:$dataset" \
bq ls "$qualified" \
| grep TABLE \
| awk '{print $1}'
)
# generate all of the schemas in parallel
for table in $tables; do
write_scalars "$product" "$dataset" "$table" &
write_histograms "$product" "$dataset" "$table" &
write_scalars "$product" "$dataset" "$table" "$dst_project" &
write_histograms "$product" "$dataset" "$table" "$dst_project" &
done
# wait for all of the processes before continuing
@ -69,17 +72,21 @@ function write_clients_daily_aggregates {
cd "$(dirname "$0")/../.."
error="STAGE must be one of (daily, incremental, all)"
project=${SRC_PROJECT:-moz-fx-data-shared-prod}
# The project for generating the clients daily tables
src_project=${SRC_PROJECT:-moz-fx-data-shared-prod}
# We may also define the PROJECT as the destination project for backwards
# compatibility.
dst_project=${DST_PROJECT:-${PROJECT:-glam-fenix-dev}}
product=${PRODUCT?PRODUCT must be defined}
stage=${STAGE?$error}
if [[ $stage == "daily" ]]; then
write_clients_daily_aggregates "$product" "$project"
write_clients_daily_aggregates "$product" "$src_project" "$dst_project"
python3 -m bigquery_etl.glam.generate --prefix "${product}" --daily-view-only
elif [[ $stage == "incremental" ]]; then
python3 -m bigquery_etl.glam.generate --prefix "${product}"
elif [[ $stage == "all" ]]; then
write_clients_daily_aggregates "$product" "$project"
write_clients_daily_aggregates "$product" "$src_project" "$dst_project"
python3 -m bigquery_etl.glam.generate --prefix "${product}"
else
echo "$error"

Просмотреть файл

@ -13,7 +13,12 @@ print(dt.strftime("%Y-%m-%d"))
EOD
}
# Project of the generated SQL
PROJECT=${PROJECT:-"glam-fenix-dev"}
# The DST project is the location of the destination tables, which may be
# different from PROJECT. One example would be for running desktop SQL into a
# dataset that's not moz-fx-data-shared-prod.
DST_PROJECT=${DST_PROJECT:-$PROJECT}
PROD_DATASET=${PROD_DATASET:-"glam_etl"}
DATASET=${DATASET:-"glam_etl_dev"}
SUBMISSION_DATE=${SUBMISSION_DATE:-$(yesterday)}
@ -28,16 +33,6 @@ function replace_dataset {
sed "s/$PROD_DATASET/$DATASET/g" < "$sql_path"
}
# replace project and dataset in an init or view file
function replace_project_dataset {
local sql_path=$1
local prod="moz-fx-data-shared-prod"
sed "s/$prod/$PROJECT/g" < "$sql_path" | \
sed "s/$PROD_DATASET/$DATASET/g"
}
function run_query {
local destination_table=$1
local time_partition=${2:-false}
@ -46,7 +41,7 @@ function run_query {
local query_location=${5:-$destination_table}
local additional_arguments="${6:---replace}"
local sample_size=${7:-10}
local query="sql/moz-fx-data-shared-prod/$PROD_DATASET/$query_location/query.sql"
local query="sql/$PROJECT/$PROD_DATASET/$query_location/query.sql"
# add an option to write to a time-partitioned table
if $time_partition; then
@ -60,7 +55,7 @@ function run_query {
--max_rows=0 \
--use_legacy_sql=false \
$additional_arguments \
--project_id="$PROJECT" \
--project_id="$DST_PROJECT" \
--dataset_id="$DATASET" \
--destination_table="$destination_table" \
--parameter="submission_date:DATE:$SUBMISSION_DATE" \
@ -74,16 +69,16 @@ function run_query {
function run_init {
local destination_table=$1
local init="sql/moz-fx-data-shared-prod/$PROD_DATASET/$destination_table/init.sql"
local init="sql/$PROJECT/$PROD_DATASET/$destination_table/init.sql"
# run if needed
if ! bq show "${DATASET}.${destination_table}" &> /dev/null; then
echo "running $init"
local tmp
tmp=$(mktemp)
replace_project_dataset "$init" > "$tmp"
replace_dataset "$init" > "$tmp"
bq query \
--use_legacy_sql=false \
--project_id="$PROJECT" \
--project_id="$DST_PROJECT" \
< "$tmp"
fi
}
@ -91,14 +86,14 @@ function run_init {
function run_view {
local view_name=$1
local view="sql/moz-fx-data-shared-prod/$PROD_DATASET/$view_name/view.sql"
local view="sql/$PROJECT/$PROD_DATASET/$view_name/view.sql"
echo "running $view"
local tmp
tmp=$(mktemp)
replace_project_dataset "$view" > "$tmp"
replace_dataset "$view" > "$tmp"
bq query \
--use_legacy_sql=false \
--project_id="$PROJECT" \
--project_id="$DST_PROJECT" \
--dataset_id="$DATASET" \
< "$tmp"
}
@ -202,16 +197,17 @@ function run_glean_sql {
fi
if [[ $stage == "daily" || $stage == "all" ]]; then
if ((start_stage <= 0)) && [[ $export_only = false ]]; then
for directory in sql/moz-fx-data-shared-prod/glam_etl/"${product}"__clients_daily_scalar_aggregates*/; do
for directory in sql/$PROJECT/glam_etl/"${product}"__clients_daily_scalar_aggregates*/; do
run_query "$(basename "$directory")" true &
done
for directory in sql/moz-fx-data-shared-prod/glam_etl/"${product}"__clients_daily_histogram_aggregates*/; do
for directory in sql/$PROJECT/glam_etl/"${product}"__clients_daily_histogram_aggregates*/; do
run_query "$(basename "$directory")" true &
done
wait
# run in daily and incremental
run_view "${product}__view_clients_daily_scalar_aggregates_v1" &
run_view "${product}__view_clients_daily_histogram_aggregates_v1" &
wait
fi
fi
if [[ $stage == "incremental" || $stage == "all" ]]; then

Просмотреть файл

@ -0,0 +1,46 @@
#!/bin/bash
# generate sql for checking into the repository and for testing the workflow
set -e
project=${PROJECT:-glam-fenix-dev}
skip_generate=${SKIP_GENERATE:-false}
# NOTE: there are three app_ids that we must look at for historical context. For
# the purpose of this script, it is sufficient to look only at what is currently
# "fenix nightly". We must have at least one table scalar/histogram tables for
# each of the referenced tables in the view. We'll keep all pings for
# org_mozilla_fenix, and only the metrics ping for the others.
app_ids=(
"org_mozilla_fenix"
"org_mozilla_fenix_nightly"
"org_mozilla_fennec_aurora"
)
logical_app_id="org_mozilla_fenix_glam_nightly"
dir="$(dirname "$0")/.."
sql_dir=$dir/../../sql/$project/glam_etl
if [[ $skip_generate == false ]]; then
for app_id in "${app_ids[@]}"; do
PRODUCT=$app_id STAGE=daily $dir/generate_glean_sql &
done
wait
# remove tables to reduce noise of checked-in queries
for app_id in "${app_ids[@]}"; do
if [[ $app_id == "org_mozilla_fenix" ]]; then
continue;
fi
for path in "${sql_dir}/${app_id}__clients"*; do
if [[ $path == "${sql_dir}/${app_id}__clients"*metrics* ]]; then
continue;
fi
rm -r $path
done
done
PRODUCT=$logical_app_id STAGE=incremental $dir/generate_glean_sql
fi
for app_id in "${app_ids[@]}"; do
PRODUCT=$app_id STAGE=daily $dir/run_glam_sql
done
PRODUCT=$logical_app_id STAGE=incremental $dir/run_glam_sql

Просмотреть файл

@ -1,18 +0,0 @@
#!/bin/bash
# generate sql for checking into the repository and for testing the workflow
set -e
skip_generate=${SKIP_GENERATE:-false}
app_id="org_mozilla_firefox"
logical_app_id="org_mozilla_fenix_glam_release"
dir="$(dirname "$0")/.."
if [[ $skip_generate == false ]]; then
PRODUCT=$app_id STAGE=daily $dir/generate_glean_sql
PRODUCT=$logical_app_id STAGE=incremental $dir/generate_glean_sql
fi
PRODUCT=$app_id STAGE=daily $dir/run_glam_sql
PRODUCT=$logical_app_id STAGE=incremental $dir/run_glam_sql

Просмотреть файл

@ -1,4 +1,4 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_histogram_aggregates --source-table org_mozilla_firefox_stable.metrics_v1
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_histogram_aggregates --source-table org_mozilla_fenix_stable.metrics_v1
WITH extracted AS (
SELECT
*,
@ -13,7 +13,7 @@ WITH extracted AS (
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_stable.metrics_v1`
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.metrics_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL
@ -159,6 +159,11 @@ histograms AS (
"timing_distribution",
metrics.timing_distribution.gfx_webrender_sceneswap_time.values
),
(
"glean_database_size",
"memory_distribution",
metrics.memory_distribution.glean_database_size.values
),
(
"glean_upload_discarded_exceeding_pings_size",
"memory_distribution",
@ -209,6 +214,31 @@ histograms AS (
"timing_distribution",
metrics.timing_distribution.javascript_gc_total_time.values
),
(
"js_baseline_compile_percentage",
"custom_distribution",
metrics.custom_distribution.js_baseline_compile_percentage.values
),
(
"js_bytecode_caching_time",
"timing_distribution",
metrics.timing_distribution.js_bytecode_caching_time.values
),
(
"js_delazification_percentage",
"custom_distribution",
metrics.custom_distribution.js_delazification_percentage.values
),
(
"js_execution_percentage",
"custom_distribution",
metrics.custom_distribution.js_execution_percentage.values
),
(
"js_xdr_encode_percentage",
"custom_distribution",
metrics.custom_distribution.js_xdr_encode_percentage.values
),
(
"logins_store_read_query_time",
"timing_distribution",
@ -339,11 +369,31 @@ histograms AS (
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_end.values
),
(
"performance_time_load_event_end_no_preload",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_end_no_preload.values
),
(
"performance_time_load_event_end_preload",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_end_preload.values
),
(
"performance_time_load_event_start",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_start.values
),
(
"performance_time_load_event_start_no_preload",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_start_no_preload.values
),
(
"performance_time_load_event_start_preload",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_start_preload.values
),
(
"performance_time_response_start",
"timing_distribution",
@ -427,7 +477,7 @@ aggregated AS (
channel,
metric,
metric_type,
`moz-fx-data-shared-prod`.udf.map_sum(ARRAY_CONCAT_AGG(value)) AS value
mozfun.map.sum(ARRAY_CONCAT_AGG(value)) AS value
FROM
flattened_histograms
GROUP BY

Просмотреть файл

@ -1,4 +1,4 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_firefox_stable.activation_v1
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_fenix_stable.activation_v1
WITH extracted AS (
SELECT
*,
@ -13,7 +13,7 @@ WITH extracted AS (
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_stable.activation_v1`
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.activation_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL

Просмотреть файл

@ -1,4 +1,4 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_firefox_stable.baseline_v1
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_fenix_stable.baseline_v1
WITH extracted AS (
SELECT
*,
@ -13,7 +13,7 @@ WITH extracted AS (
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_stable.baseline_v1`
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.baseline_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL

Просмотреть файл

@ -1,4 +1,4 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_firefox_stable.bookmarks_sync_v1
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_fenix_stable.bookmarks_sync_v1
WITH extracted AS (
SELECT
*,
@ -13,7 +13,7 @@ WITH extracted AS (
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_stable.bookmarks_sync_v1`
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.bookmarks_sync_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL

Просмотреть файл

@ -1,4 +1,4 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_firefox_stable.deletion_request_v1
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_fenix_stable.deletion_request_v1
WITH extracted AS (
SELECT
*,
@ -13,7 +13,7 @@ WITH extracted AS (
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_stable.deletion_request_v1`
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.deletion_request_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL

Просмотреть файл

@ -1,4 +1,4 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_firefox_stable.events_v1
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_fenix_stable.events_v1
WITH extracted AS (
SELECT
*,
@ -13,7 +13,7 @@ WITH extracted AS (
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_stable.events_v1`
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.events_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL

Просмотреть файл

@ -1,4 +1,4 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_firefox_stable.first_session_v1
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_fenix_stable.first_session_v1
WITH extracted AS (
SELECT
*,
@ -13,7 +13,7 @@ WITH extracted AS (
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_stable.first_session_v1`
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.first_session_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL

Просмотреть файл

@ -1,4 +1,4 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_firefox_stable.history_sync_v1
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_fenix_stable.history_sync_v1
WITH extracted AS (
SELECT
*,
@ -13,7 +13,7 @@ WITH extracted AS (
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_stable.history_sync_v1`
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.history_sync_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL

Просмотреть файл

@ -1,4 +1,4 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_firefox_stable.installation_v1
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_fenix_stable.installation_v1
WITH extracted AS (
SELECT
*,
@ -13,7 +13,7 @@ WITH extracted AS (
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_stable.installation_v1`
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.installation_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL

Просмотреть файл

@ -1,4 +1,4 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_firefox_stable.logins_sync_v1
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_fenix_stable.logins_sync_v1
WITH extracted AS (
SELECT
*,
@ -13,7 +13,7 @@ WITH extracted AS (
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_stable.logins_sync_v1`
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.logins_sync_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL

Просмотреть файл

@ -1,4 +1,4 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_firefox_stable.metrics_v1
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_fenix_stable.metrics_v1
WITH extracted AS (
SELECT
*,
@ -13,7 +13,7 @@ WITH extracted AS (
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_stable.metrics_v1`
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.metrics_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL
@ -343,6 +343,41 @@ unlabeled_metrics AS (
'sum',
sum(CAST(metrics.counter.glean_upload_deleted_pings_after_quota_hit AS INT64))
),
(
'glean_upload_pending_pings',
'counter',
'',
'avg',
avg(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
),
(
'glean_upload_pending_pings',
'counter',
'',
'count',
IF(MIN(metrics.counter.glean_upload_pending_pings) IS NULL, NULL, COUNT(*))
),
(
'glean_upload_pending_pings',
'counter',
'',
'max',
max(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
),
(
'glean_upload_pending_pings',
'counter',
'',
'min',
min(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
),
(
'glean_upload_pending_pings',
'counter',
'',
'sum',
sum(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
),
(
'glean_validation_app_forceclosed_count',
'counter',

Просмотреть файл

@ -1,4 +1,4 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_firefox_stable.migration_v1
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_fenix_stable.migration_v1
WITH extracted AS (
SELECT
*,
@ -13,7 +13,7 @@ WITH extracted AS (
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_stable.migration_v1`
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.migration_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL

Просмотреть файл

@ -1,4 +1,4 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_firefox_stable.startup_timeline_v1
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_fenix_stable.startup_timeline_v1
WITH extracted AS (
SELECT
*,
@ -13,7 +13,7 @@ WITH extracted AS (
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_stable.startup_timeline_v1`
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.startup_timeline_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL

Просмотреть файл

@ -1,4 +1,4 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_firefox_stable.sync_v1
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_fenix_stable.sync_v1
WITH extracted AS (
SELECT
*,
@ -13,7 +13,7 @@ WITH extracted AS (
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_firefox_stable.sync_v1`
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.sync_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL

Просмотреть файл

@ -0,0 +1,10 @@
-- view for org_mozilla_fenix__view_clients_daily_histogram_aggregates_v1;
-- View for histogram aggregates that handles time-partitioning
CREATE OR REPLACE VIEW
`glam-fenix-dev.glam_etl.org_mozilla_fenix__view_clients_daily_histogram_aggregates_v1`
AS
SELECT
* EXCEPT (submission_date),
DATE(_PARTITIONTIME) AS submission_date
FROM
`glam-fenix-dev.glam_etl.org_mozilla_fenix__clients_daily_histogram_aggregates*`

Просмотреть файл

@ -0,0 +1,10 @@
-- view for org_mozilla_fenix__view_clients_daily_scalar_aggregates_v1;
-- View to union daily scalar aggregates with date partitioning
CREATE OR REPLACE VIEW
`glam-fenix-dev.glam_etl.org_mozilla_fenix__view_clients_daily_scalar_aggregates_v1`
AS
SELECT
* EXCEPT (submission_date),
DATE(_PARTITIONTIME) AS submission_date
FROM
`glam-fenix-dev.glam_etl.org_mozilla_fenix__clients_daily_scalar_aggregates*`

Просмотреть файл

@ -1,6 +1,6 @@
-- init for org_mozilla_fenix_glam_release__clients_histogram_aggregates_v1;
-- init for org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1;
CREATE TABLE IF NOT EXISTS
`moz-fx-data-shared-prod.glam_etl.org_mozilla_fenix_glam_release__clients_histogram_aggregates_v1`(
`glam-fenix-dev.glam_etl.org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1`(
sample_id INT64,
client_id STRING,
ping_type STRING,
@ -10,7 +10,6 @@ CREATE TABLE IF NOT EXISTS
channel STRING,
histogram_aggregates ARRAY<
STRUCT<
latest_version INT64,
metric STRING,
metric_type STRING,
key STRING,

Просмотреть файл

@ -1,8 +1,7 @@
-- query for org_mozilla_fenix_glam_release__clients_histogram_aggregates_v1;
-- query for org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1;
CREATE TEMP FUNCTION udf_merged_user_data(aggs ANY TYPE)
RETURNS ARRAY<
STRUCT<
latest_version INT64,
metric STRING,
metric_type STRING,
key STRING,
@ -19,24 +18,21 @@ RETURNS ARRAY<
),
aggregated_data AS (
SELECT AS STRUCT
latest_version,
metric,
metric_type,
key,
agg_type,
`moz-fx-data-shared-prod`.udf.map_sum(ARRAY_CONCAT_AGG(value)) AS value
mozfun.map.sum(ARRAY_CONCAT_AGG(value)) AS value
FROM
unnested
GROUP BY
latest_version,
latest_version,
metric,
metric_type,
key,
agg_type
)
SELECT
ARRAY_AGG((latest_version, metric, metric_type, key, agg_type, value))
ARRAY_AGG((metric, metric_type, key, agg_type, value))
FROM
aggregated_data
)
@ -46,7 +42,7 @@ WITH extracted_accumulated AS (
SELECT
*
FROM
glam_etl.org_mozilla_fenix_glam_release__clients_histogram_aggregates_v1
glam_etl.org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1
WHERE
sample_id >= @min_sample_id
AND sample_id <= @max_sample_id
@ -63,23 +59,15 @@ filtered_accumulated AS (
histogram_aggregates
FROM
extracted_accumulated
LEFT JOIN
glam_etl.org_mozilla_fenix_glam_release__latest_versions_v1
USING
(channel)
WHERE
-- allow for builds to be slighly ahead of the current submission date, to
-- account for a reasonable amount of clock skew
`moz-fx-data-shared-prod`.udf.fenix_build_to_datetime(app_build_id) < DATE_ADD(
@submission_date,
INTERVAL 3 day
)
mozfun.glam.build_hour_to_datetime(app_build_id) < DATE_ADD(@submission_date, INTERVAL 3 day)
-- only keep builds from the last year
AND `moz-fx-data-shared-prod`.udf.fenix_build_to_datetime(app_build_id) > DATE_SUB(
AND mozfun.glam.build_hour_to_datetime(app_build_id) > DATE_SUB(
@submission_date,
INTERVAL 365 day
)
AND app_version >= (latest_version - 2)
),
-- unnest the daily data
extracted_daily AS (
@ -88,7 +76,7 @@ extracted_daily AS (
CAST(app_version AS INT64) AS app_version,
unnested_histogram_aggregates AS histogram_aggregates
FROM
glam_etl.org_mozilla_fenix_glam_release__view_clients_daily_histogram_aggregates_v1,
glam_etl.org_mozilla_fenix_glam_nightly__view_clients_daily_histogram_aggregates_v1,
UNNEST(histogram_aggregates) unnested_histogram_aggregates
WHERE
submission_date = @submission_date
@ -104,27 +92,18 @@ filtered_daily AS (
app_version,
app_build_id,
channel,
latest_version,
histogram_aggregates.*
FROM
extracted_daily
LEFT JOIN
glam_etl.org_mozilla_fenix_glam_release__latest_versions_v1
USING
(channel)
WHERE
-- allow for builds to be slighly ahead of the current submission date, to
-- account for a reasonable amount of clock skew
`moz-fx-data-shared-prod`.udf.fenix_build_to_datetime(app_build_id) < DATE_ADD(
@submission_date,
INTERVAL 3 day
)
mozfun.glam.build_hour_to_datetime(app_build_id) < DATE_ADD(@submission_date, INTERVAL 3 day)
-- only keep builds from the last year
AND `moz-fx-data-shared-prod`.udf.fenix_build_to_datetime(app_build_id) > DATE_SUB(
AND mozfun.glam.build_hour_to_datetime(app_build_id) > DATE_SUB(
@submission_date,
INTERVAL 365 day
)
AND app_version >= (latest_version - 2)
),
-- re-aggregate based on the latest version
aggregated_daily AS (
@ -136,12 +115,11 @@ aggregated_daily AS (
app_version,
app_build_id,
channel,
latest_version,
metric,
metric_type,
key,
agg_type,
`moz-fx-data-shared-prod`.udf.map_sum(ARRAY_CONCAT_AGG(value)) AS value
mozfun.map.sum(ARRAY_CONCAT_AGG(value)) AS value
FROM
filtered_daily
GROUP BY
@ -152,7 +130,6 @@ aggregated_daily AS (
app_version,
app_build_id,
channel,
latest_version,
metric,
metric_type,
key,
@ -170,13 +147,12 @@ transformed_daily AS (
channel,
ARRAY_AGG(
STRUCT<
latest_version INT64,
metric STRING,
metric_type STRING,
key STRING,
agg_type STRING,
aggregates ARRAY<STRUCT<key STRING, value INT64>>
>(latest_version, metric, metric_type, key, agg_type, value)
>(metric, metric_type, key, agg_type, value)
) AS histogram_aggregates
FROM
aggregated_daily

Просмотреть файл

@ -1,6 +1,6 @@
-- init for org_mozilla_fenix_glam_release__clients_scalar_aggregates_v1;
-- init for org_mozilla_fenix_glam_nightly__clients_scalar_aggregates_v1;
CREATE TABLE IF NOT EXISTS
`moz-fx-data-shared-prod.glam_etl.org_mozilla_fenix_glam_release__clients_scalar_aggregates_v1`(
`glam-fenix-dev.glam_etl.org_mozilla_fenix_glam_nightly__clients_scalar_aggregates_v1`(
client_id STRING,
ping_type STRING,
os STRING,

Просмотреть файл

@ -1,4 +1,4 @@
-- query for org_mozilla_fenix_glam_release__clients_scalar_aggregates_v1;
-- query for org_mozilla_fenix_glam_nightly__clients_scalar_aggregates_v1;
CREATE TEMP FUNCTION udf_merged_user_data(
aggs ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>
)
@ -89,7 +89,7 @@ WITH filtered_date_channel AS (
SELECT
*
FROM
glam_etl.org_mozilla_fenix_glam_release__view_clients_daily_scalar_aggregates_v1
glam_etl.org_mozilla_fenix_glam_nightly__view_clients_daily_scalar_aggregates_v1
WHERE
submission_date = @submission_date
),
@ -130,23 +130,15 @@ version_filtered_new AS (
value
FROM
filtered_aggregates AS scalar_aggs
LEFT JOIN
glam_etl.org_mozilla_fenix_glam_release__latest_versions_v1
USING
(channel)
WHERE
-- allow for builds to be slighly ahead of the current submission date, to
-- account for a reasonable amount of clock skew
`moz-fx-data-shared-prod`.udf.fenix_build_to_datetime(app_build_id) < DATE_ADD(
@submission_date,
INTERVAL 3 day
)
mozfun.glam.build_hour_to_datetime(app_build_id) < DATE_ADD(@submission_date, INTERVAL 3 day)
-- only keep builds from the last year
AND `moz-fx-data-shared-prod`.udf.fenix_build_to_datetime(app_build_id) > DATE_SUB(
AND mozfun.glam.build_hour_to_datetime(app_build_id) > DATE_SUB(
@submission_date,
INTERVAL 365 day
)
AND app_version >= (latest_version - 2)
),
scalar_aggregates_new AS (
SELECT
@ -213,24 +205,16 @@ filtered_old AS (
scalar_aggs.channel,
scalar_aggregates
FROM
glam_etl.org_mozilla_fenix_glam_release__clients_scalar_aggregates_v1 AS scalar_aggs
LEFT JOIN
glam_etl.org_mozilla_fenix_glam_release__latest_versions_v1
USING
(channel)
glam_etl.org_mozilla_fenix_glam_nightly__clients_scalar_aggregates_v1 AS scalar_aggs
WHERE
-- allow for builds to be slighly ahead of the current submission date, to
-- account for a reasonable amount of clock skew
`moz-fx-data-shared-prod`.udf.fenix_build_to_datetime(app_build_id) < DATE_ADD(
@submission_date,
INTERVAL 3 day
)
mozfun.glam.build_hour_to_datetime(app_build_id) < DATE_ADD(@submission_date, INTERVAL 3 day)
-- only keep builds from the last year
AND `moz-fx-data-shared-prod`.udf.fenix_build_to_datetime(app_build_id) > DATE_SUB(
AND mozfun.glam.build_hour_to_datetime(app_build_id) > DATE_SUB(
@submission_date,
INTERVAL 365 day
)
AND app_version >= (latest_version - 2)
),
joined_new_old AS (
SELECT

Просмотреть файл

@ -1,4 +1,4 @@
-- query for org_mozilla_fenix_glam_release__extract_probe_counts_v1;
-- query for org_mozilla_fenix_glam_nightly__extract_probe_counts_v1;
CREATE TEMP FUNCTION udf_js_flatten(histogram ARRAY<STRUCT<key STRING, value FLOAT64>>)
RETURNS STRING DETERMINISTIC
LANGUAGE js
@ -17,8 +17,10 @@ SELECT
ping_type,
os,
app_build_id AS build_id,
SAFE_CAST(
`moz-fx-data-shared-prod`.udf.fenix_build_to_datetime(app_build_id) AS STRING
IF(
app_build_id = "*",
"*",
SAFE_CAST(mozfun.glam.build_hour_to_datetime(app_build_id) AS STRING)
) AS build_date,
metric,
metric_type,
@ -31,7 +33,7 @@ SELECT
MAX(IF(agg_type = "histogram", udf_js_flatten(aggregates), NULL)) AS histogram,
MAX(IF(agg_type = "percentiles", udf_js_flatten(aggregates), NULL)) AS percentiles,
FROM
`glam_etl.org_mozilla_fenix_glam_release__view_probe_counts_v1`
`glam_etl.org_mozilla_fenix_glam_nightly__view_probe_counts_v1`
GROUP BY
channel,
app_version,

Просмотреть файл

@ -1,4 +1,4 @@
-- query for org_mozilla_fenix_glam_release__extract_user_counts_v1;
-- query for org_mozilla_fenix_glam_nightly__extract_user_counts_v1;
WITH deduped AS (
SELECT
*,
@ -13,15 +13,17 @@ WITH deduped AS (
total_users DESC
) AS rank
FROM
`glam_etl.org_mozilla_fenix_glam_release__view_user_counts_v1`
`glam_etl.org_mozilla_fenix_glam_nightly__view_user_counts_v1`
)
SELECT
channel,
app_version,
coalesce(ping_type, "*") AS ping_type,
COALESCE(app_build_id, "*") AS app_build_id,
SAFE_CAST(
`moz-fx-data-shared-prod`.udf.fenix_build_to_datetime(app_build_id) AS STRING
IF(
app_build_id = "*",
"*",
SAFE_CAST(mozfun.glam.build_hour_to_datetime(app_build_id) AS STRING)
) AS build_date,
COALESCE(os, "*") AS os,
total_users

Просмотреть файл

@ -1,8 +1,7 @@
-- query for org_mozilla_fenix_glam_release__histogram_bucket_counts_v1;
-- query for org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1;
CREATE TEMP FUNCTION udf_merged_user_data(aggs ANY TYPE)
RETURNS ARRAY<
STRUCT<
latest_version INT64,
metric STRING,
metric_type STRING,
key STRING,
@ -19,24 +18,21 @@ RETURNS ARRAY<
),
aggregated_data AS (
SELECT AS STRUCT
latest_version,
metric,
metric_type,
key,
agg_type,
`moz-fx-data-shared-prod`.udf.map_sum(ARRAY_CONCAT_AGG(value)) AS value
mozfun.map.sum(ARRAY_CONCAT_AGG(value)) AS value
FROM
unnested
GROUP BY
latest_version,
latest_version,
metric,
metric_type,
key,
agg_type
)
SELECT
ARRAY_AGG((latest_version, metric, metric_type, key, agg_type, value))
ARRAY_AGG((metric, metric_type, key, agg_type, value))
FROM
aggregated_data
)
@ -84,7 +80,6 @@ RETURNS ARRAY<STRUCT<key STRING, value FLOAT64>> AS (
CREATE TEMP FUNCTION udf_normalize_histograms(
arrs ARRAY<
STRUCT<
latest_version INT64,
metric STRING,
metric_type STRING,
key STRING,
@ -95,7 +90,6 @@ CREATE TEMP FUNCTION udf_normalize_histograms(
)
RETURNS ARRAY<
STRUCT<
latest_version INT64,
metric STRING,
metric_type STRING,
key STRING,
@ -106,7 +100,6 @@ RETURNS ARRAY<
(
WITH normalized AS (
SELECT
latest_version,
metric,
metric_type,
key,
@ -117,7 +110,7 @@ RETURNS ARRAY<
UNNEST(arrs)
)
SELECT
ARRAY_AGG((latest_version, metric, metric_type, key, agg_type, aggregates))
ARRAY_AGG((metric, metric_type, key, agg_type, aggregates))
FROM
normalized
)
@ -153,7 +146,7 @@ all_combos AS (
COALESCE(combo.os, table.os) AS os,
COALESCE(combo.app_build_id, table.app_build_id) AS app_build_id
FROM
glam_etl.org_mozilla_fenix_glam_release__clients_histogram_aggregates_v1 table
glam_etl.org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1 table
CROSS JOIN
static_combos combo
),
@ -195,7 +188,6 @@ unnested AS (
app_version,
app_build_id,
channel,
histogram_aggregates.latest_version AS latest_version,
histogram_aggregates.metric AS metric,
histogram_aggregates.metric_type AS metric_type,
histogram_aggregates.key AS key,
@ -289,6 +281,38 @@ distribution_metadata AS (
5000 AS range_max,
50 AS bucket_count,
"exponential" AS histogram_type
),
STRUCT(
"custom_distribution" AS metric_type,
"js_baseline_compile_percentage" AS metric,
0 AS range_min,
100 AS range_max,
20 AS bucket_count,
"linear" AS histogram_type
),
STRUCT(
"custom_distribution" AS metric_type,
"js_delazification_percentage" AS metric,
0 AS range_min,
100 AS range_max,
20 AS bucket_count,
"linear" AS histogram_type
),
STRUCT(
"custom_distribution" AS metric_type,
"js_execution_percentage" AS metric,
0 AS range_min,
100 AS range_max,
20 AS bucket_count,
"linear" AS histogram_type
),
STRUCT(
"custom_distribution" AS metric_type,
"js_xdr_encode_percentage" AS metric,
0 AS range_min,
100 AS range_max,
20 AS bucket_count,
"linear" AS histogram_type
)
]
)
@ -315,7 +339,6 @@ records AS (
app_version,
app_build_id,
channel,
latest_version,
metric,
metric_type,
key,
@ -329,7 +352,6 @@ records AS (
app_version,
app_build_id,
channel,
latest_version,
metric,
metric_type,
key,

Просмотреть файл

@ -1,4 +1,4 @@
-- query for org_mozilla_fenix_glam_release__histogram_percentiles_v1;
-- query for org_mozilla_fenix_glam_nightly__histogram_percentiles_v1;
SELECT
* EXCEPT (aggregates) REPLACE('percentiles' AS agg_type),
ARRAY<STRUCT<key STRING, value FLOAT64>>[
@ -9,4 +9,4 @@ SELECT
('95', udf_js.glean_percentile(95, aggregates, metric_type))
] AS aggregates
FROM
glam_etl.org_mozilla_fenix_glam_release__histogram_probe_counts_v1
glam_etl.org_mozilla_fenix_glam_nightly__histogram_probe_counts_v1

Просмотреть файл

@ -1,4 +1,4 @@
-- query for org_mozilla_fenix_glam_release__histogram_probe_counts_v1;
-- query for org_mozilla_fenix_glam_nightly__histogram_probe_counts_v1;
CREATE TEMP FUNCTION udf_exponential_buckets(min FLOAT64, max FLOAT64, nBuckets FLOAT64)
RETURNS ARRAY<FLOAT64> DETERMINISTIC
LANGUAGE js
@ -177,7 +177,7 @@ SELECT
udf_to_string_arr(udf_get_buckets(metric_type, range_min, range_max, bucket_count))
) AS aggregates
FROM
glam_etl.org_mozilla_fenix_glam_release__histogram_bucket_counts_v1
glam_etl.org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1
GROUP BY
ping_type,
os,

Просмотреть файл

@ -1,13 +1,15 @@
-- query for org_mozilla_fenix_glam_release__latest_versions_v1;
-- query for org_mozilla_fenix_glam_nightly__latest_versions_v1;
WITH extracted AS (
SELECT
client_id,
channel,
app_version
FROM
glam_etl.org_mozilla_fenix_glam_release__view_clients_daily_scalar_aggregates_v1
glam_etl.org_mozilla_fenix_glam_nightly__view_clients_daily_scalar_aggregates_v1
WHERE
submission_date > DATE_SUB(CURRENT_DATE(), INTERVAL 30 DAY)
submission_date
BETWEEN DATE_SUB(@submission_date, INTERVAL 28 DAY)
AND @submission_date
AND channel IS NOT NULL
),
transformed AS (
@ -20,7 +22,7 @@ transformed AS (
channel,
app_version
HAVING
COUNT(DISTINCT client_id) > 100
COUNT(DISTINCT client_id) > 5
ORDER BY
channel,
app_version DESC

Просмотреть файл

@ -1,4 +1,4 @@
-- query for org_mozilla_fenix_glam_release__scalar_bucket_counts_v1;
-- query for org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1;
CREATE TEMP FUNCTION udf_bucket(val FLOAT64)
RETURNS FLOAT64 AS (
-- Bucket `value` into a histogram with min_bucket, max_bucket and num_buckets
@ -471,7 +471,7 @@ all_combos AS (
COALESCE(combo.os, table.os) AS os,
COALESCE(combo.app_build_id, table.app_build_id) AS app_build_id
FROM
glam_etl.org_mozilla_fenix_glam_release__clients_scalar_aggregates_v1 table
glam_etl.org_mozilla_fenix_glam_nightly__clients_scalar_aggregates_v1 table
CROSS JOIN
static_combos combo
),

Просмотреть файл

@ -1,4 +1,4 @@
-- query for org_mozilla_fenix_glam_release__scalar_percentiles_v1;
-- query for org_mozilla_fenix_glam_nightly__scalar_percentiles_v1;
CREATE TEMP FUNCTION udf_get_values(required ARRAY<FLOAT64>, VALUES ARRAY<FLOAT64>)
RETURNS ARRAY<STRUCT<key STRING, value FLOAT64>> AS (
(
@ -22,7 +22,7 @@ WITH flat_clients_scalar_aggregates AS (
SELECT
* EXCEPT (scalar_aggregates)
FROM
glam_etl.org_mozilla_fenix_glam_release__clients_scalar_aggregates_v1
glam_etl.org_mozilla_fenix_glam_nightly__clients_scalar_aggregates_v1
CROSS JOIN
UNNEST(scalar_aggregates)
),

Просмотреть файл

@ -1,4 +1,4 @@
-- query for org_mozilla_fenix_glam_release__scalar_probe_counts_v1;
-- query for org_mozilla_fenix_glam_nightly__scalar_probe_counts_v1;
CREATE TEMP FUNCTION udf_get_buckets()
RETURNS ARRAY<STRING> AS (
(
@ -336,7 +336,7 @@ SELECT
END
AS aggregates
FROM
glam_etl.org_mozilla_fenix_glam_release__scalar_bucket_counts_v1
glam_etl.org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1
GROUP BY
ping_type,
os,

Просмотреть файл

@ -0,0 +1,31 @@
CREATE OR REPLACE VIEW
`glam-fenix-dev`.glam_etl.org_mozilla_fenix_glam_nightly__view_clients_daily_histogram_aggregates_v1
AS
WITH extracted AS (
SELECT
*
FROM
`glam-fenix-dev`.glam_etl.org_mozilla_fenix__view_clients_daily_histogram_aggregates_v1
WHERE
mozfun.norm.fenix_app_info('org_mozilla_fenix', app_build_id).channel = 'nightly'
UNION ALL
SELECT
*
FROM
`glam-fenix-dev`.glam_etl.org_mozilla_fenix_nightly__view_clients_daily_histogram_aggregates_v1
UNION ALL
SELECT
*
FROM
`glam-fenix-dev`.glam_etl.org_mozilla_fennec_aurora__view_clients_daily_histogram_aggregates_v1
)
SELECT
-- NOTE: app_version is dropped due to a lack of semantic versioning. We opt
-- to use a build id as a placeholder. See
-- https://github.com/mozilla/bigquery-etl/issues/1329
* EXCEPT (app_build_id, channel, app_version),
mozfun.glam.fenix_build_to_build_hour(app_build_id) AS app_build_id,
"*" AS channel,
SAFE_CAST(app_build_id AS INT64) AS app_version,
FROM
extracted

Просмотреть файл

@ -0,0 +1,31 @@
CREATE OR REPLACE VIEW
`glam-fenix-dev`.glam_etl.org_mozilla_fenix_glam_nightly__view_clients_daily_scalar_aggregates_v1
AS
WITH extracted AS (
SELECT
*
FROM
`glam-fenix-dev`.glam_etl.org_mozilla_fenix__view_clients_daily_scalar_aggregates_v1
WHERE
mozfun.norm.fenix_app_info('org_mozilla_fenix', app_build_id).channel = 'nightly'
UNION ALL
SELECT
*
FROM
`glam-fenix-dev`.glam_etl.org_mozilla_fenix_nightly__view_clients_daily_scalar_aggregates_v1
UNION ALL
SELECT
*
FROM
`glam-fenix-dev`.glam_etl.org_mozilla_fennec_aurora__view_clients_daily_scalar_aggregates_v1
)
SELECT
-- NOTE: app_version is dropped due to a lack of semantic versioning. We opt
-- to use a build id as a placeholder. See
-- https://github.com/mozilla/bigquery-etl/issues/1329
* EXCEPT (app_build_id, channel, app_version),
mozfun.glam.fenix_build_to_build_hour(app_build_id) AS app_build_id,
"*" AS channel,
SAFE_CAST(app_build_id AS INT64) AS app_version,
FROM
extracted

Просмотреть файл

@ -0,0 +1,29 @@
-- view for org_mozilla_fenix_glam_nightly__view_probe_counts_v1;
CREATE OR REPLACE VIEW
`glam-fenix-dev.glam_etl.org_mozilla_fenix_glam_nightly__view_probe_counts_v1`
AS
WITH all_counts AS (
SELECT
*
FROM
`glam-fenix-dev.glam_etl.org_mozilla_fenix_glam_nightly__scalar_probe_counts_v1`
UNION ALL
SELECT
*
FROM
`glam-fenix-dev.glam_etl.org_mozilla_fenix_glam_nightly__histogram_probe_counts_v1`
UNION ALL
SELECT
*
FROM
`glam-fenix-dev.glam_etl.org_mozilla_fenix_glam_nightly__scalar_percentiles_v1`
UNION ALL
SELECT
*
FROM
`glam-fenix-dev.glam_etl.org_mozilla_fenix_glam_nightly__histogram_percentiles_v1`
)
SELECT
*
FROM
all_counts

Просмотреть файл

@ -1,6 +1,6 @@
-- view for org_mozilla_fenix_glam_release__view_user_counts_v1;
-- view for org_mozilla_fenix_glam_nightly__view_user_counts_v1;
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.glam_etl.org_mozilla_fenix_glam_release__view_user_counts_v1`
`glam-fenix-dev.glam_etl.org_mozilla_fenix_glam_nightly__view_user_counts_v1`
AS
WITH all_clients AS (
SELECT
@ -11,7 +11,7 @@ WITH all_clients AS (
app_build_id,
channel
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix_glam_release__clients_scalar_aggregates_v1
`glam-fenix-dev`.glam_etl.org_mozilla_fenix_glam_nightly__clients_scalar_aggregates_v1
UNION ALL
SELECT
client_id,
@ -21,7 +21,7 @@ WITH all_clients AS (
app_build_id,
channel
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix_glam_release__clients_histogram_aggregates_v1
`glam-fenix-dev`.glam_etl.org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1
),
-- Cross join with the attribute combinations to reduce the query complexity
-- with respect to the number of operations. A table with n rows cross joined

Просмотреть файл

@ -0,0 +1,523 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_histogram_aggregates --source-table org_mozilla_fenix_nightly_stable.metrics_v1
WITH extracted AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
"metrics" AS ping_type,
COALESCE(
SAFE_CAST(SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS INT64),
0
) AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_nightly_stable.metrics_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL
),
histograms AS (
SELECT
sample_id,
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<metric STRING, metric_type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
(
"geckoview_content_process_lifetime",
"timing_distribution",
metrics.timing_distribution.geckoview_content_process_lifetime.values
),
(
"geckoview_document_site_origins",
"custom_distribution",
metrics.custom_distribution.geckoview_document_site_origins.values
),
(
"geckoview_page_load_progress_time",
"timing_distribution",
metrics.timing_distribution.geckoview_page_load_progress_time.values
),
(
"geckoview_page_load_time",
"timing_distribution",
metrics.timing_distribution.geckoview_page_load_time.values
),
(
"geckoview_page_reload_time",
"timing_distribution",
metrics.timing_distribution.geckoview_page_reload_time.values
),
(
"geckoview_per_document_site_origins",
"custom_distribution",
metrics.custom_distribution.geckoview_per_document_site_origins.values
),
(
"geckoview_startup_runtime",
"timing_distribution",
metrics.timing_distribution.geckoview_startup_runtime.values
),
(
"gfx_checkerboard_duration",
"timing_distribution",
metrics.timing_distribution.gfx_checkerboard_duration.values
),
(
"gfx_checkerboard_peak_pixel_count",
"custom_distribution",
metrics.custom_distribution.gfx_checkerboard_peak_pixel_count.values
),
(
"gfx_checkerboard_potential_duration",
"timing_distribution",
metrics.timing_distribution.gfx_checkerboard_potential_duration.values
),
(
"gfx_checkerboard_severity",
"custom_distribution",
metrics.custom_distribution.gfx_checkerboard_severity.values
),
(
"gfx_composite_time",
"timing_distribution",
metrics.timing_distribution.gfx_composite_time.values
),
(
"gfx_content_frame_time_from_paint",
"custom_distribution",
metrics.custom_distribution.gfx_content_frame_time_from_paint.values
),
(
"gfx_content_frame_time_from_vsync",
"custom_distribution",
metrics.custom_distribution.gfx_content_frame_time_from_vsync.values
),
(
"gfx_content_frame_time_with_svg",
"custom_distribution",
metrics.custom_distribution.gfx_content_frame_time_with_svg.values
),
(
"gfx_content_frame_time_without_resource_upload",
"custom_distribution",
metrics.custom_distribution.gfx_content_frame_time_without_resource_upload.values
),
(
"gfx_content_frame_time_without_upload",
"custom_distribution",
metrics.custom_distribution.gfx_content_frame_time_without_upload.values
),
(
"gfx_content_full_paint_time",
"timing_distribution",
metrics.timing_distribution.gfx_content_full_paint_time.values
),
(
"gfx_content_paint_time",
"timing_distribution",
metrics.timing_distribution.gfx_content_paint_time.values
),
(
"gfx_scroll_present_latency",
"timing_distribution",
metrics.timing_distribution.gfx_scroll_present_latency.values
),
(
"gfx_status_framebuild_time",
"timing_distribution",
metrics.timing_distribution.gfx_status_framebuild_time.values
),
(
"gfx_status_sceneswap_time",
"timing_distribution",
metrics.timing_distribution.gfx_status_sceneswap_time.values
),
(
"gfx_webrender_framebuild_time",
"timing_distribution",
metrics.timing_distribution.gfx_webrender_framebuild_time.values
),
(
"gfx_webrender_render_time",
"timing_distribution",
metrics.timing_distribution.gfx_webrender_render_time.values
),
(
"gfx_webrender_scenebuild_time",
"timing_distribution",
metrics.timing_distribution.gfx_webrender_scenebuild_time.values
),
(
"gfx_webrender_sceneswap_time",
"timing_distribution",
metrics.timing_distribution.gfx_webrender_sceneswap_time.values
),
(
"glean_database_size",
"memory_distribution",
metrics.memory_distribution.glean_database_size.values
),
(
"glean_upload_discarded_exceeding_pings_size",
"memory_distribution",
metrics.memory_distribution.glean_upload_discarded_exceeding_pings_size.values
),
(
"glean_upload_pending_pings_directory_size",
"memory_distribution",
metrics.memory_distribution.glean_upload_pending_pings_directory_size.values
),
(
"javascript_gc_compact_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_compact_time.values
),
(
"javascript_gc_mark_roots_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_mark_roots_time.values
),
(
"javascript_gc_mark_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_mark_time.values
),
(
"javascript_gc_minor_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_minor_time.values
),
(
"javascript_gc_prepare_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_prepare_time.values
),
(
"javascript_gc_slice_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_slice_time.values
),
(
"javascript_gc_sweep_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_sweep_time.values
),
(
"javascript_gc_total_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_total_time.values
),
(
"js_baseline_compile_percentage",
"custom_distribution",
metrics.custom_distribution.js_baseline_compile_percentage.values
),
(
"js_bytecode_caching_time",
"timing_distribution",
metrics.timing_distribution.js_bytecode_caching_time.values
),
(
"js_delazification_percentage",
"custom_distribution",
metrics.custom_distribution.js_delazification_percentage.values
),
(
"js_execution_percentage",
"custom_distribution",
metrics.custom_distribution.js_execution_percentage.values
),
(
"js_xdr_encode_percentage",
"custom_distribution",
metrics.custom_distribution.js_xdr_encode_percentage.values
),
(
"logins_store_read_query_time",
"timing_distribution",
metrics.timing_distribution.logins_store_read_query_time.values
),
(
"logins_store_unlock_time",
"timing_distribution",
metrics.timing_distribution.logins_store_unlock_time.values
),
(
"logins_store_write_query_time",
"timing_distribution",
metrics.timing_distribution.logins_store_write_query_time.values
),
(
"network_cache_hit_time",
"timing_distribution",
metrics.timing_distribution.network_cache_hit_time.values
),
(
"network_dns_end",
"timing_distribution",
metrics.timing_distribution.network_dns_end.values
),
(
"network_dns_start",
"timing_distribution",
metrics.timing_distribution.network_dns_start.values
),
(
"network_first_from_cache",
"timing_distribution",
metrics.timing_distribution.network_first_from_cache.values
),
(
"network_font_download_end",
"timing_distribution",
metrics.timing_distribution.network_font_download_end.values
),
(
"network_tcp_connection",
"timing_distribution",
metrics.timing_distribution.network_tcp_connection.values
),
(
"network_tls_handshake",
"timing_distribution",
metrics.timing_distribution.network_tls_handshake.values
),
(
"perf_awesomebar_bookmark_suggestions",
"timing_distribution",
metrics.timing_distribution.perf_awesomebar_bookmark_suggestions.values
),
(
"perf_awesomebar_clipboard_suggestions",
"timing_distribution",
metrics.timing_distribution.perf_awesomebar_clipboard_suggestions.values
),
(
"perf_awesomebar_history_suggestions",
"timing_distribution",
metrics.timing_distribution.perf_awesomebar_history_suggestions.values
),
(
"perf_awesomebar_search_engine_suggestions",
"timing_distribution",
metrics.timing_distribution.perf_awesomebar_search_engine_suggestions.values
),
(
"perf_awesomebar_session_suggestions",
"timing_distribution",
metrics.timing_distribution.perf_awesomebar_session_suggestions.values
),
(
"perf_awesomebar_shortcuts_suggestions",
"timing_distribution",
metrics.timing_distribution.perf_awesomebar_shortcuts_suggestions.values
),
(
"perf_awesomebar_synced_tabs_suggestions",
"timing_distribution",
metrics.timing_distribution.perf_awesomebar_synced_tabs_suggestions.values
),
(
"performance_interaction_keypress_present_latency",
"timing_distribution",
metrics.timing_distribution.performance_interaction_keypress_present_latency.values
),
(
"performance_interaction_tab_switch_composite",
"timing_distribution",
metrics.timing_distribution.performance_interaction_tab_switch_composite.values
),
(
"performance_page_non_blank_paint",
"timing_distribution",
metrics.timing_distribution.performance_page_non_blank_paint.values
),
(
"performance_page_total_content_page_load",
"timing_distribution",
metrics.timing_distribution.performance_page_total_content_page_load.values
),
(
"performance_time_dom_complete",
"timing_distribution",
metrics.timing_distribution.performance_time_dom_complete.values
),
(
"performance_time_dom_content_loaded_end",
"timing_distribution",
metrics.timing_distribution.performance_time_dom_content_loaded_end.values
),
(
"performance_time_dom_content_loaded_start",
"timing_distribution",
metrics.timing_distribution.performance_time_dom_content_loaded_start.values
),
(
"performance_time_dom_interactive",
"timing_distribution",
metrics.timing_distribution.performance_time_dom_interactive.values
),
(
"performance_time_load_event_end",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_end.values
),
(
"performance_time_load_event_end_no_preload",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_end_no_preload.values
),
(
"performance_time_load_event_end_preload",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_end_preload.values
),
(
"performance_time_load_event_start",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_start.values
),
(
"performance_time_load_event_start_no_preload",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_start_no_preload.values
),
(
"performance_time_load_event_start_preload",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_start_preload.values
),
(
"performance_time_response_start",
"timing_distribution",
metrics.timing_distribution.performance_time_response_start.values
),
(
"places_manager_read_query_time",
"timing_distribution",
metrics.timing_distribution.places_manager_read_query_time.values
),
(
"places_manager_scan_query_time",
"timing_distribution",
metrics.timing_distribution.places_manager_scan_query_time.values
),
(
"places_manager_write_query_time",
"timing_distribution",
metrics.timing_distribution.places_manager_write_query_time.values
),
(
"storage_stats_app_bytes",
"memory_distribution",
metrics.memory_distribution.storage_stats_app_bytes.values
),
(
"storage_stats_cache_bytes",
"memory_distribution",
metrics.memory_distribution.storage_stats_cache_bytes.values
),
(
"storage_stats_data_dir_bytes",
"memory_distribution",
metrics.memory_distribution.storage_stats_data_dir_bytes.values
),
(
"storage_stats_query_stats_duration",
"timing_distribution",
metrics.timing_distribution.storage_stats_query_stats_duration.values
)
] AS metadata
FROM
extracted
),
flattened_histograms AS (
SELECT
sample_id,
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metadata.*
FROM
histograms,
UNNEST(metadata) AS metadata
WHERE
value IS NOT NULL
),
-- ARRAY_CONCAT_AGG may fail if the array of records exceeds 20 MB when
-- serialized and shuffled. This may exhibit itself in a pathological case where
-- the a single client sends *many* pings in a single day. However, this case
-- has not been observed. If this does occur, each histogram should be unnested
-- aggregated. This will force more shuffles and is inefficient. This may be
-- mitigated by removing all of the empty entries which are sent to keep bucket
-- ranges contiguous.
--
-- Tested via org_mozilla_fenix.metrics_v1 for 2020-02-23, unnest vs concat
-- Slot consumed: 00:50:15 vs 00:06:45, Shuffled: 27.5GB vs 6.0 GB
aggregated AS (
SELECT
sample_id,
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
mozfun.map.sum(ARRAY_CONCAT_AGG(value)) AS value
FROM
flattened_histograms
GROUP BY
sample_id,
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type
)
SELECT
sample_id,
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY_AGG(
STRUCT<
metric STRING,
metric_type STRING,
key STRING,
agg_type STRING,
value ARRAY<STRUCT<key STRING, value INT64>>
>(metric, metric_type, '', 'summed_histogram', value)
) AS histogram_aggregates
FROM
aggregated
GROUP BY
sample_id,
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel

Просмотреть файл

@ -0,0 +1,990 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_fenix_nightly_stable.metrics_v1
WITH extracted AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
"metrics" AS ping_type,
COALESCE(
SAFE_CAST(SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS INT64),
0
) AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_nightly_stable.metrics_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL
),
unlabeled_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(
'addons_has_enabled_addons',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.addons_has_enabled_addons AS INT64))
),
(
'addons_has_enabled_addons',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.addons_has_enabled_addons AS INT64))
),
(
'addons_has_installed_addons',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.addons_has_installed_addons AS INT64))
),
(
'addons_has_installed_addons',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.addons_has_installed_addons AS INT64))
),
(
'events_total_uri_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'events_total_uri_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.events_total_uri_count) IS NULL, NULL, COUNT(*))
),
(
'events_total_uri_count',
'counter',
'',
'max',
max(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'events_total_uri_count',
'counter',
'',
'min',
min(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'events_total_uri_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'gfx_adapter_primary_ram',
'quantity',
'',
'avg',
avg(CAST(metrics.quantity.gfx_adapter_primary_ram AS INT64))
),
(
'gfx_adapter_primary_ram',
'quantity',
'',
'count',
IF(MIN(metrics.quantity.gfx_adapter_primary_ram) IS NULL, NULL, COUNT(*))
),
(
'gfx_adapter_primary_ram',
'quantity',
'',
'max',
max(CAST(metrics.quantity.gfx_adapter_primary_ram AS INT64))
),
(
'gfx_adapter_primary_ram',
'quantity',
'',
'min',
min(CAST(metrics.quantity.gfx_adapter_primary_ram AS INT64))
),
(
'gfx_adapter_primary_ram',
'quantity',
'',
'sum',
sum(CAST(metrics.quantity.gfx_adapter_primary_ram AS INT64))
),
(
'gfx_display_count',
'quantity',
'',
'avg',
avg(CAST(metrics.quantity.gfx_display_count AS INT64))
),
(
'gfx_display_count',
'quantity',
'',
'count',
IF(MIN(metrics.quantity.gfx_display_count) IS NULL, NULL, COUNT(*))
),
(
'gfx_display_count',
'quantity',
'',
'max',
max(CAST(metrics.quantity.gfx_display_count AS INT64))
),
(
'gfx_display_count',
'quantity',
'',
'min',
min(CAST(metrics.quantity.gfx_display_count AS INT64))
),
(
'gfx_display_count',
'quantity',
'',
'sum',
sum(CAST(metrics.quantity.gfx_display_count AS INT64))
),
(
'gfx_display_primary_height',
'quantity',
'',
'avg',
avg(CAST(metrics.quantity.gfx_display_primary_height AS INT64))
),
(
'gfx_display_primary_height',
'quantity',
'',
'count',
IF(MIN(metrics.quantity.gfx_display_primary_height) IS NULL, NULL, COUNT(*))
),
(
'gfx_display_primary_height',
'quantity',
'',
'max',
max(CAST(metrics.quantity.gfx_display_primary_height AS INT64))
),
(
'gfx_display_primary_height',
'quantity',
'',
'min',
min(CAST(metrics.quantity.gfx_display_primary_height AS INT64))
),
(
'gfx_display_primary_height',
'quantity',
'',
'sum',
sum(CAST(metrics.quantity.gfx_display_primary_height AS INT64))
),
(
'gfx_display_primary_width',
'quantity',
'',
'avg',
avg(CAST(metrics.quantity.gfx_display_primary_width AS INT64))
),
(
'gfx_display_primary_width',
'quantity',
'',
'count',
IF(MIN(metrics.quantity.gfx_display_primary_width) IS NULL, NULL, COUNT(*))
),
(
'gfx_display_primary_width',
'quantity',
'',
'max',
max(CAST(metrics.quantity.gfx_display_primary_width AS INT64))
),
(
'gfx_display_primary_width',
'quantity',
'',
'min',
min(CAST(metrics.quantity.gfx_display_primary_width AS INT64))
),
(
'gfx_display_primary_width',
'quantity',
'',
'sum',
sum(CAST(metrics.quantity.gfx_display_primary_width AS INT64))
),
(
'gfx_status_headless',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.gfx_status_headless AS INT64))
),
(
'gfx_status_headless',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.gfx_status_headless AS INT64))
),
(
'glean_core_migration_successful',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.glean_core_migration_successful AS INT64))
),
(
'glean_core_migration_successful',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.glean_core_migration_successful AS INT64))
),
(
'glean_error_preinit_tasks_overflow',
'counter',
'',
'avg',
avg(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
),
(
'glean_error_preinit_tasks_overflow',
'counter',
'',
'count',
IF(MIN(metrics.counter.glean_error_preinit_tasks_overflow) IS NULL, NULL, COUNT(*))
),
(
'glean_error_preinit_tasks_overflow',
'counter',
'',
'max',
max(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
),
(
'glean_error_preinit_tasks_overflow',
'counter',
'',
'min',
min(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
),
(
'glean_error_preinit_tasks_overflow',
'counter',
'',
'sum',
sum(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
),
(
'glean_error_preinit_tasks_timeout',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.glean_error_preinit_tasks_timeout AS INT64))
),
(
'glean_error_preinit_tasks_timeout',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.glean_error_preinit_tasks_timeout AS INT64))
),
(
'glean_upload_deleted_pings_after_quota_hit',
'counter',
'',
'avg',
avg(CAST(metrics.counter.glean_upload_deleted_pings_after_quota_hit AS INT64))
),
(
'glean_upload_deleted_pings_after_quota_hit',
'counter',
'',
'count',
IF(MIN(metrics.counter.glean_upload_deleted_pings_after_quota_hit) IS NULL, NULL, COUNT(*))
),
(
'glean_upload_deleted_pings_after_quota_hit',
'counter',
'',
'max',
max(CAST(metrics.counter.glean_upload_deleted_pings_after_quota_hit AS INT64))
),
(
'glean_upload_deleted_pings_after_quota_hit',
'counter',
'',
'min',
min(CAST(metrics.counter.glean_upload_deleted_pings_after_quota_hit AS INT64))
),
(
'glean_upload_deleted_pings_after_quota_hit',
'counter',
'',
'sum',
sum(CAST(metrics.counter.glean_upload_deleted_pings_after_quota_hit AS INT64))
),
(
'glean_upload_pending_pings',
'counter',
'',
'avg',
avg(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
),
(
'glean_upload_pending_pings',
'counter',
'',
'count',
IF(MIN(metrics.counter.glean_upload_pending_pings) IS NULL, NULL, COUNT(*))
),
(
'glean_upload_pending_pings',
'counter',
'',
'max',
max(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
),
(
'glean_upload_pending_pings',
'counter',
'',
'min',
min(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
),
(
'glean_upload_pending_pings',
'counter',
'',
'sum',
sum(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
),
(
'glean_validation_app_forceclosed_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
),
(
'glean_validation_app_forceclosed_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.glean_validation_app_forceclosed_count) IS NULL, NULL, COUNT(*))
),
(
'glean_validation_app_forceclosed_count',
'counter',
'',
'max',
max(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
),
(
'glean_validation_app_forceclosed_count',
'counter',
'',
'min',
min(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
),
(
'glean_validation_app_forceclosed_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
),
(
'glean_validation_baseline_ping_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
),
(
'glean_validation_baseline_ping_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.glean_validation_baseline_ping_count) IS NULL, NULL, COUNT(*))
),
(
'glean_validation_baseline_ping_count',
'counter',
'',
'max',
max(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
),
(
'glean_validation_baseline_ping_count',
'counter',
'',
'min',
min(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
),
(
'glean_validation_baseline_ping_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
),
(
'logins_store_read_query_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.logins_store_read_query_count AS INT64))
),
(
'logins_store_read_query_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.logins_store_read_query_count) IS NULL, NULL, COUNT(*))
),
(
'logins_store_read_query_count',
'counter',
'',
'max',
max(CAST(metrics.counter.logins_store_read_query_count AS INT64))
),
(
'logins_store_read_query_count',
'counter',
'',
'min',
min(CAST(metrics.counter.logins_store_read_query_count AS INT64))
),
(
'logins_store_read_query_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.logins_store_read_query_count AS INT64))
),
(
'logins_store_unlock_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.logins_store_unlock_count AS INT64))
),
(
'logins_store_unlock_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.logins_store_unlock_count) IS NULL, NULL, COUNT(*))
),
(
'logins_store_unlock_count',
'counter',
'',
'max',
max(CAST(metrics.counter.logins_store_unlock_count AS INT64))
),
(
'logins_store_unlock_count',
'counter',
'',
'min',
min(CAST(metrics.counter.logins_store_unlock_count AS INT64))
),
(
'logins_store_unlock_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.logins_store_unlock_count AS INT64))
),
(
'logins_store_write_query_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.logins_store_write_query_count AS INT64))
),
(
'logins_store_write_query_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.logins_store_write_query_count) IS NULL, NULL, COUNT(*))
),
(
'logins_store_write_query_count',
'counter',
'',
'max',
max(CAST(metrics.counter.logins_store_write_query_count AS INT64))
),
(
'logins_store_write_query_count',
'counter',
'',
'min',
min(CAST(metrics.counter.logins_store_write_query_count AS INT64))
),
(
'logins_store_write_query_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.logins_store_write_query_count AS INT64))
),
(
'metrics_default_browser',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.metrics_default_browser AS INT64))
),
(
'metrics_default_browser',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.metrics_default_browser AS INT64))
),
(
'metrics_has_open_tabs',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.metrics_has_open_tabs AS INT64))
),
(
'metrics_has_open_tabs',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.metrics_has_open_tabs AS INT64))
),
(
'metrics_has_recent_pwas',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.metrics_has_recent_pwas AS INT64))
),
(
'metrics_has_recent_pwas',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.metrics_has_recent_pwas AS INT64))
),
(
'metrics_has_top_sites',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.metrics_has_top_sites AS INT64))
),
(
'metrics_has_top_sites',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.metrics_has_top_sites AS INT64))
),
(
'metrics_recently_used_pwa_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.metrics_recently_used_pwa_count AS INT64))
),
(
'metrics_recently_used_pwa_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.metrics_recently_used_pwa_count) IS NULL, NULL, COUNT(*))
),
(
'metrics_recently_used_pwa_count',
'counter',
'',
'max',
max(CAST(metrics.counter.metrics_recently_used_pwa_count AS INT64))
),
(
'metrics_recently_used_pwa_count',
'counter',
'',
'min',
min(CAST(metrics.counter.metrics_recently_used_pwa_count AS INT64))
),
(
'metrics_recently_used_pwa_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.metrics_recently_used_pwa_count AS INT64))
),
(
'metrics_search_widget_installed',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.metrics_search_widget_installed AS INT64))
),
(
'metrics_search_widget_installed',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.metrics_search_widget_installed AS INT64))
),
(
'metrics_tabs_open_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.metrics_tabs_open_count AS INT64))
),
(
'metrics_tabs_open_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.metrics_tabs_open_count) IS NULL, NULL, COUNT(*))
),
(
'metrics_tabs_open_count',
'counter',
'',
'max',
max(CAST(metrics.counter.metrics_tabs_open_count AS INT64))
),
(
'metrics_tabs_open_count',
'counter',
'',
'min',
min(CAST(metrics.counter.metrics_tabs_open_count AS INT64))
),
(
'metrics_tabs_open_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.metrics_tabs_open_count AS INT64))
),
(
'metrics_top_sites_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.metrics_top_sites_count AS INT64))
),
(
'metrics_top_sites_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.metrics_top_sites_count) IS NULL, NULL, COUNT(*))
),
(
'metrics_top_sites_count',
'counter',
'',
'max',
max(CAST(metrics.counter.metrics_top_sites_count AS INT64))
),
(
'metrics_top_sites_count',
'counter',
'',
'min',
min(CAST(metrics.counter.metrics_top_sites_count AS INT64))
),
(
'metrics_top_sites_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.metrics_top_sites_count AS INT64))
),
(
'places_manager_read_query_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.places_manager_read_query_count AS INT64))
),
(
'places_manager_read_query_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.places_manager_read_query_count) IS NULL, NULL, COUNT(*))
),
(
'places_manager_read_query_count',
'counter',
'',
'max',
max(CAST(metrics.counter.places_manager_read_query_count AS INT64))
),
(
'places_manager_read_query_count',
'counter',
'',
'min',
min(CAST(metrics.counter.places_manager_read_query_count AS INT64))
),
(
'places_manager_read_query_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.places_manager_read_query_count AS INT64))
),
(
'places_manager_write_query_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.places_manager_write_query_count AS INT64))
),
(
'places_manager_write_query_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.places_manager_write_query_count) IS NULL, NULL, COUNT(*))
),
(
'places_manager_write_query_count',
'counter',
'',
'max',
max(CAST(metrics.counter.places_manager_write_query_count AS INT64))
),
(
'places_manager_write_query_count',
'counter',
'',
'min',
min(CAST(metrics.counter.places_manager_write_query_count AS INT64))
),
(
'places_manager_write_query_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.places_manager_write_query_count AS INT64))
)
] AS scalar_aggregates
FROM
extracted
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel
),
grouped_labeled_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
(
'browser_search_ad_clicks',
'labeled_counter',
metrics.labeled_counter.browser_search_ad_clicks
),
(
'browser_search_in_content',
'labeled_counter',
metrics.labeled_counter.browser_search_in_content
),
(
'browser_search_with_ads',
'labeled_counter',
metrics.labeled_counter.browser_search_with_ads
),
(
'crash_metrics_crash_count',
'labeled_counter',
metrics.labeled_counter.crash_metrics_crash_count
),
(
'gfx_content_frame_time_reason',
'labeled_counter',
metrics.labeled_counter.gfx_content_frame_time_reason
),
(
'glean_error_invalid_label',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_label
),
(
'glean_error_invalid_overflow',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_overflow
),
(
'glean_error_invalid_state',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_state
),
(
'glean_error_invalid_value',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_value
),
(
'glean_upload_ping_upload_failure',
'labeled_counter',
metrics.labeled_counter.glean_upload_ping_upload_failure
),
(
'logins_store_read_query_error_count',
'labeled_counter',
metrics.labeled_counter.logins_store_read_query_error_count
),
(
'logins_store_unlock_error_count',
'labeled_counter',
metrics.labeled_counter.logins_store_unlock_error_count
),
(
'logins_store_write_query_error_count',
'labeled_counter',
metrics.labeled_counter.logins_store_write_query_error_count
),
('metrics_search_count', 'labeled_counter', metrics.labeled_counter.metrics_search_count),
(
'places_manager_read_query_error_count',
'labeled_counter',
metrics.labeled_counter.places_manager_read_query_error_count
),
(
'places_manager_write_query_error_count',
'labeled_counter',
metrics.labeled_counter.places_manager_write_query_error_count
)
] AS metrics
FROM
extracted
),
flattened_labeled_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metrics.name AS metric,
metrics.type AS metric_type,
value.key AS key,
value.value AS value
FROM
grouped_labeled_metrics
CROSS JOIN
UNNEST(metrics) AS metrics,
UNNEST(metrics.value) AS value
),
aggregated_labeled_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key,
MAX(value) AS max,
MIN(value) AS min,
AVG(value) AS avg,
SUM(value) AS sum,
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
FROM
flattened_labeled_metrics
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key
),
labeled_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY_CONCAT_AGG(
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(metric, metric_type, key, 'max', max),
(metric, metric_type, key, 'min', min),
(metric, metric_type, key, 'avg', avg),
(metric, metric_type, key, 'sum', sum),
(metric, metric_type, key, 'count', count)
]
) AS scalar_aggregates
FROM
aggregated_labeled_metrics
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel
)
SELECT
*
FROM
unlabeled_metrics
UNION ALL
SELECT
*
FROM
labeled_metrics

Просмотреть файл

@ -0,0 +1,10 @@
-- view for org_mozilla_fenix_nightly__view_clients_daily_histogram_aggregates_v1;
-- View for histogram aggregates that handles time-partitioning
CREATE OR REPLACE VIEW
`glam-fenix-dev.glam_etl.org_mozilla_fenix_nightly__view_clients_daily_histogram_aggregates_v1`
AS
SELECT
* EXCEPT (submission_date),
DATE(_PARTITIONTIME) AS submission_date
FROM
`glam-fenix-dev.glam_etl.org_mozilla_fenix_nightly__clients_daily_histogram_aggregates*`

Просмотреть файл

@ -0,0 +1,10 @@
-- view for org_mozilla_fenix_nightly__view_clients_daily_scalar_aggregates_v1;
-- View to union daily scalar aggregates with date partitioning
CREATE OR REPLACE VIEW
`glam-fenix-dev.glam_etl.org_mozilla_fenix_nightly__view_clients_daily_scalar_aggregates_v1`
AS
SELECT
* EXCEPT (submission_date),
DATE(_PARTITIONTIME) AS submission_date
FROM
`glam-fenix-dev.glam_etl.org_mozilla_fenix_nightly__clients_daily_scalar_aggregates*`

Просмотреть файл

@ -0,0 +1,523 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_histogram_aggregates --source-table org_mozilla_fennec_aurora_stable.metrics_v1
WITH extracted AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
"metrics" AS ping_type,
COALESCE(
SAFE_CAST(SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS INT64),
0
) AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fennec_aurora_stable.metrics_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL
),
histograms AS (
SELECT
sample_id,
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<metric STRING, metric_type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
(
"geckoview_content_process_lifetime",
"timing_distribution",
metrics.timing_distribution.geckoview_content_process_lifetime.values
),
(
"geckoview_document_site_origins",
"custom_distribution",
metrics.custom_distribution.geckoview_document_site_origins.values
),
(
"geckoview_page_load_progress_time",
"timing_distribution",
metrics.timing_distribution.geckoview_page_load_progress_time.values
),
(
"geckoview_page_load_time",
"timing_distribution",
metrics.timing_distribution.geckoview_page_load_time.values
),
(
"geckoview_page_reload_time",
"timing_distribution",
metrics.timing_distribution.geckoview_page_reload_time.values
),
(
"geckoview_per_document_site_origins",
"custom_distribution",
metrics.custom_distribution.geckoview_per_document_site_origins.values
),
(
"geckoview_startup_runtime",
"timing_distribution",
metrics.timing_distribution.geckoview_startup_runtime.values
),
(
"gfx_checkerboard_duration",
"timing_distribution",
metrics.timing_distribution.gfx_checkerboard_duration.values
),
(
"gfx_checkerboard_peak_pixel_count",
"custom_distribution",
metrics.custom_distribution.gfx_checkerboard_peak_pixel_count.values
),
(
"gfx_checkerboard_potential_duration",
"timing_distribution",
metrics.timing_distribution.gfx_checkerboard_potential_duration.values
),
(
"gfx_checkerboard_severity",
"custom_distribution",
metrics.custom_distribution.gfx_checkerboard_severity.values
),
(
"gfx_composite_time",
"timing_distribution",
metrics.timing_distribution.gfx_composite_time.values
),
(
"gfx_content_frame_time_from_paint",
"custom_distribution",
metrics.custom_distribution.gfx_content_frame_time_from_paint.values
),
(
"gfx_content_frame_time_from_vsync",
"custom_distribution",
metrics.custom_distribution.gfx_content_frame_time_from_vsync.values
),
(
"gfx_content_frame_time_with_svg",
"custom_distribution",
metrics.custom_distribution.gfx_content_frame_time_with_svg.values
),
(
"gfx_content_frame_time_without_resource_upload",
"custom_distribution",
metrics.custom_distribution.gfx_content_frame_time_without_resource_upload.values
),
(
"gfx_content_frame_time_without_upload",
"custom_distribution",
metrics.custom_distribution.gfx_content_frame_time_without_upload.values
),
(
"gfx_content_full_paint_time",
"timing_distribution",
metrics.timing_distribution.gfx_content_full_paint_time.values
),
(
"gfx_content_paint_time",
"timing_distribution",
metrics.timing_distribution.gfx_content_paint_time.values
),
(
"gfx_scroll_present_latency",
"timing_distribution",
metrics.timing_distribution.gfx_scroll_present_latency.values
),
(
"gfx_status_framebuild_time",
"timing_distribution",
metrics.timing_distribution.gfx_status_framebuild_time.values
),
(
"gfx_status_sceneswap_time",
"timing_distribution",
metrics.timing_distribution.gfx_status_sceneswap_time.values
),
(
"gfx_webrender_framebuild_time",
"timing_distribution",
metrics.timing_distribution.gfx_webrender_framebuild_time.values
),
(
"gfx_webrender_render_time",
"timing_distribution",
metrics.timing_distribution.gfx_webrender_render_time.values
),
(
"gfx_webrender_scenebuild_time",
"timing_distribution",
metrics.timing_distribution.gfx_webrender_scenebuild_time.values
),
(
"gfx_webrender_sceneswap_time",
"timing_distribution",
metrics.timing_distribution.gfx_webrender_sceneswap_time.values
),
(
"glean_database_size",
"memory_distribution",
metrics.memory_distribution.glean_database_size.values
),
(
"glean_upload_discarded_exceeding_pings_size",
"memory_distribution",
metrics.memory_distribution.glean_upload_discarded_exceeding_pings_size.values
),
(
"glean_upload_pending_pings_directory_size",
"memory_distribution",
metrics.memory_distribution.glean_upload_pending_pings_directory_size.values
),
(
"javascript_gc_compact_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_compact_time.values
),
(
"javascript_gc_mark_roots_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_mark_roots_time.values
),
(
"javascript_gc_mark_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_mark_time.values
),
(
"javascript_gc_minor_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_minor_time.values
),
(
"javascript_gc_prepare_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_prepare_time.values
),
(
"javascript_gc_slice_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_slice_time.values
),
(
"javascript_gc_sweep_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_sweep_time.values
),
(
"javascript_gc_total_time",
"timing_distribution",
metrics.timing_distribution.javascript_gc_total_time.values
),
(
"js_baseline_compile_percentage",
"custom_distribution",
metrics.custom_distribution.js_baseline_compile_percentage.values
),
(
"js_bytecode_caching_time",
"timing_distribution",
metrics.timing_distribution.js_bytecode_caching_time.values
),
(
"js_delazification_percentage",
"custom_distribution",
metrics.custom_distribution.js_delazification_percentage.values
),
(
"js_execution_percentage",
"custom_distribution",
metrics.custom_distribution.js_execution_percentage.values
),
(
"js_xdr_encode_percentage",
"custom_distribution",
metrics.custom_distribution.js_xdr_encode_percentage.values
),
(
"logins_store_read_query_time",
"timing_distribution",
metrics.timing_distribution.logins_store_read_query_time.values
),
(
"logins_store_unlock_time",
"timing_distribution",
metrics.timing_distribution.logins_store_unlock_time.values
),
(
"logins_store_write_query_time",
"timing_distribution",
metrics.timing_distribution.logins_store_write_query_time.values
),
(
"network_cache_hit_time",
"timing_distribution",
metrics.timing_distribution.network_cache_hit_time.values
),
(
"network_dns_end",
"timing_distribution",
metrics.timing_distribution.network_dns_end.values
),
(
"network_dns_start",
"timing_distribution",
metrics.timing_distribution.network_dns_start.values
),
(
"network_first_from_cache",
"timing_distribution",
metrics.timing_distribution.network_first_from_cache.values
),
(
"network_font_download_end",
"timing_distribution",
metrics.timing_distribution.network_font_download_end.values
),
(
"network_tcp_connection",
"timing_distribution",
metrics.timing_distribution.network_tcp_connection.values
),
(
"network_tls_handshake",
"timing_distribution",
metrics.timing_distribution.network_tls_handshake.values
),
(
"perf_awesomebar_bookmark_suggestions",
"timing_distribution",
metrics.timing_distribution.perf_awesomebar_bookmark_suggestions.values
),
(
"perf_awesomebar_clipboard_suggestions",
"timing_distribution",
metrics.timing_distribution.perf_awesomebar_clipboard_suggestions.values
),
(
"perf_awesomebar_history_suggestions",
"timing_distribution",
metrics.timing_distribution.perf_awesomebar_history_suggestions.values
),
(
"perf_awesomebar_search_engine_suggestions",
"timing_distribution",
metrics.timing_distribution.perf_awesomebar_search_engine_suggestions.values
),
(
"perf_awesomebar_session_suggestions",
"timing_distribution",
metrics.timing_distribution.perf_awesomebar_session_suggestions.values
),
(
"perf_awesomebar_shortcuts_suggestions",
"timing_distribution",
metrics.timing_distribution.perf_awesomebar_shortcuts_suggestions.values
),
(
"perf_awesomebar_synced_tabs_suggestions",
"timing_distribution",
metrics.timing_distribution.perf_awesomebar_synced_tabs_suggestions.values
),
(
"performance_interaction_keypress_present_latency",
"timing_distribution",
metrics.timing_distribution.performance_interaction_keypress_present_latency.values
),
(
"performance_interaction_tab_switch_composite",
"timing_distribution",
metrics.timing_distribution.performance_interaction_tab_switch_composite.values
),
(
"performance_page_non_blank_paint",
"timing_distribution",
metrics.timing_distribution.performance_page_non_blank_paint.values
),
(
"performance_page_total_content_page_load",
"timing_distribution",
metrics.timing_distribution.performance_page_total_content_page_load.values
),
(
"performance_time_dom_complete",
"timing_distribution",
metrics.timing_distribution.performance_time_dom_complete.values
),
(
"performance_time_dom_content_loaded_end",
"timing_distribution",
metrics.timing_distribution.performance_time_dom_content_loaded_end.values
),
(
"performance_time_dom_content_loaded_start",
"timing_distribution",
metrics.timing_distribution.performance_time_dom_content_loaded_start.values
),
(
"performance_time_dom_interactive",
"timing_distribution",
metrics.timing_distribution.performance_time_dom_interactive.values
),
(
"performance_time_load_event_end",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_end.values
),
(
"performance_time_load_event_end_no_preload",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_end_no_preload.values
),
(
"performance_time_load_event_end_preload",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_end_preload.values
),
(
"performance_time_load_event_start",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_start.values
),
(
"performance_time_load_event_start_no_preload",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_start_no_preload.values
),
(
"performance_time_load_event_start_preload",
"timing_distribution",
metrics.timing_distribution.performance_time_load_event_start_preload.values
),
(
"performance_time_response_start",
"timing_distribution",
metrics.timing_distribution.performance_time_response_start.values
),
(
"places_manager_read_query_time",
"timing_distribution",
metrics.timing_distribution.places_manager_read_query_time.values
),
(
"places_manager_scan_query_time",
"timing_distribution",
metrics.timing_distribution.places_manager_scan_query_time.values
),
(
"places_manager_write_query_time",
"timing_distribution",
metrics.timing_distribution.places_manager_write_query_time.values
),
(
"storage_stats_app_bytes",
"memory_distribution",
metrics.memory_distribution.storage_stats_app_bytes.values
),
(
"storage_stats_cache_bytes",
"memory_distribution",
metrics.memory_distribution.storage_stats_cache_bytes.values
),
(
"storage_stats_data_dir_bytes",
"memory_distribution",
metrics.memory_distribution.storage_stats_data_dir_bytes.values
),
(
"storage_stats_query_stats_duration",
"timing_distribution",
metrics.timing_distribution.storage_stats_query_stats_duration.values
)
] AS metadata
FROM
extracted
),
flattened_histograms AS (
SELECT
sample_id,
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metadata.*
FROM
histograms,
UNNEST(metadata) AS metadata
WHERE
value IS NOT NULL
),
-- ARRAY_CONCAT_AGG may fail if the array of records exceeds 20 MB when
-- serialized and shuffled. This may exhibit itself in a pathological case where
-- the a single client sends *many* pings in a single day. However, this case
-- has not been observed. If this does occur, each histogram should be unnested
-- aggregated. This will force more shuffles and is inefficient. This may be
-- mitigated by removing all of the empty entries which are sent to keep bucket
-- ranges contiguous.
--
-- Tested via org_mozilla_fenix.metrics_v1 for 2020-02-23, unnest vs concat
-- Slot consumed: 00:50:15 vs 00:06:45, Shuffled: 27.5GB vs 6.0 GB
aggregated AS (
SELECT
sample_id,
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
mozfun.map.sum(ARRAY_CONCAT_AGG(value)) AS value
FROM
flattened_histograms
GROUP BY
sample_id,
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type
)
SELECT
sample_id,
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY_AGG(
STRUCT<
metric STRING,
metric_type STRING,
key STRING,
agg_type STRING,
value ARRAY<STRUCT<key STRING, value INT64>>
>(metric, metric_type, '', 'summed_histogram', value)
) AS histogram_aggregates
FROM
aggregated
GROUP BY
sample_id,
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel

Просмотреть файл

@ -0,0 +1,990 @@
-- Query generated by: python3 -m bigquery_etl.glam.clients_daily_scalar_aggregates --source-table org_mozilla_fennec_aurora_stable.metrics_v1
WITH extracted AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
"metrics" AS ping_type,
COALESCE(
SAFE_CAST(SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS INT64),
0
) AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fennec_aurora_stable.metrics_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.client_id IS NOT NULL
),
unlabeled_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(
'addons_has_enabled_addons',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.addons_has_enabled_addons AS INT64))
),
(
'addons_has_enabled_addons',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.addons_has_enabled_addons AS INT64))
),
(
'addons_has_installed_addons',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.addons_has_installed_addons AS INT64))
),
(
'addons_has_installed_addons',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.addons_has_installed_addons AS INT64))
),
(
'events_total_uri_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'events_total_uri_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.events_total_uri_count) IS NULL, NULL, COUNT(*))
),
(
'events_total_uri_count',
'counter',
'',
'max',
max(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'events_total_uri_count',
'counter',
'',
'min',
min(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'events_total_uri_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'gfx_adapter_primary_ram',
'quantity',
'',
'avg',
avg(CAST(metrics.quantity.gfx_adapter_primary_ram AS INT64))
),
(
'gfx_adapter_primary_ram',
'quantity',
'',
'count',
IF(MIN(metrics.quantity.gfx_adapter_primary_ram) IS NULL, NULL, COUNT(*))
),
(
'gfx_adapter_primary_ram',
'quantity',
'',
'max',
max(CAST(metrics.quantity.gfx_adapter_primary_ram AS INT64))
),
(
'gfx_adapter_primary_ram',
'quantity',
'',
'min',
min(CAST(metrics.quantity.gfx_adapter_primary_ram AS INT64))
),
(
'gfx_adapter_primary_ram',
'quantity',
'',
'sum',
sum(CAST(metrics.quantity.gfx_adapter_primary_ram AS INT64))
),
(
'gfx_display_count',
'quantity',
'',
'avg',
avg(CAST(metrics.quantity.gfx_display_count AS INT64))
),
(
'gfx_display_count',
'quantity',
'',
'count',
IF(MIN(metrics.quantity.gfx_display_count) IS NULL, NULL, COUNT(*))
),
(
'gfx_display_count',
'quantity',
'',
'max',
max(CAST(metrics.quantity.gfx_display_count AS INT64))
),
(
'gfx_display_count',
'quantity',
'',
'min',
min(CAST(metrics.quantity.gfx_display_count AS INT64))
),
(
'gfx_display_count',
'quantity',
'',
'sum',
sum(CAST(metrics.quantity.gfx_display_count AS INT64))
),
(
'gfx_display_primary_height',
'quantity',
'',
'avg',
avg(CAST(metrics.quantity.gfx_display_primary_height AS INT64))
),
(
'gfx_display_primary_height',
'quantity',
'',
'count',
IF(MIN(metrics.quantity.gfx_display_primary_height) IS NULL, NULL, COUNT(*))
),
(
'gfx_display_primary_height',
'quantity',
'',
'max',
max(CAST(metrics.quantity.gfx_display_primary_height AS INT64))
),
(
'gfx_display_primary_height',
'quantity',
'',
'min',
min(CAST(metrics.quantity.gfx_display_primary_height AS INT64))
),
(
'gfx_display_primary_height',
'quantity',
'',
'sum',
sum(CAST(metrics.quantity.gfx_display_primary_height AS INT64))
),
(
'gfx_display_primary_width',
'quantity',
'',
'avg',
avg(CAST(metrics.quantity.gfx_display_primary_width AS INT64))
),
(
'gfx_display_primary_width',
'quantity',
'',
'count',
IF(MIN(metrics.quantity.gfx_display_primary_width) IS NULL, NULL, COUNT(*))
),
(
'gfx_display_primary_width',
'quantity',
'',
'max',
max(CAST(metrics.quantity.gfx_display_primary_width AS INT64))
),
(
'gfx_display_primary_width',
'quantity',
'',
'min',
min(CAST(metrics.quantity.gfx_display_primary_width AS INT64))
),
(
'gfx_display_primary_width',
'quantity',
'',
'sum',
sum(CAST(metrics.quantity.gfx_display_primary_width AS INT64))
),
(
'gfx_status_headless',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.gfx_status_headless AS INT64))
),
(
'gfx_status_headless',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.gfx_status_headless AS INT64))
),
(
'glean_core_migration_successful',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.glean_core_migration_successful AS INT64))
),
(
'glean_core_migration_successful',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.glean_core_migration_successful AS INT64))
),
(
'glean_error_preinit_tasks_overflow',
'counter',
'',
'avg',
avg(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
),
(
'glean_error_preinit_tasks_overflow',
'counter',
'',
'count',
IF(MIN(metrics.counter.glean_error_preinit_tasks_overflow) IS NULL, NULL, COUNT(*))
),
(
'glean_error_preinit_tasks_overflow',
'counter',
'',
'max',
max(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
),
(
'glean_error_preinit_tasks_overflow',
'counter',
'',
'min',
min(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
),
(
'glean_error_preinit_tasks_overflow',
'counter',
'',
'sum',
sum(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
),
(
'glean_error_preinit_tasks_timeout',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.glean_error_preinit_tasks_timeout AS INT64))
),
(
'glean_error_preinit_tasks_timeout',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.glean_error_preinit_tasks_timeout AS INT64))
),
(
'glean_upload_deleted_pings_after_quota_hit',
'counter',
'',
'avg',
avg(CAST(metrics.counter.glean_upload_deleted_pings_after_quota_hit AS INT64))
),
(
'glean_upload_deleted_pings_after_quota_hit',
'counter',
'',
'count',
IF(MIN(metrics.counter.glean_upload_deleted_pings_after_quota_hit) IS NULL, NULL, COUNT(*))
),
(
'glean_upload_deleted_pings_after_quota_hit',
'counter',
'',
'max',
max(CAST(metrics.counter.glean_upload_deleted_pings_after_quota_hit AS INT64))
),
(
'glean_upload_deleted_pings_after_quota_hit',
'counter',
'',
'min',
min(CAST(metrics.counter.glean_upload_deleted_pings_after_quota_hit AS INT64))
),
(
'glean_upload_deleted_pings_after_quota_hit',
'counter',
'',
'sum',
sum(CAST(metrics.counter.glean_upload_deleted_pings_after_quota_hit AS INT64))
),
(
'glean_upload_pending_pings',
'counter',
'',
'avg',
avg(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
),
(
'glean_upload_pending_pings',
'counter',
'',
'count',
IF(MIN(metrics.counter.glean_upload_pending_pings) IS NULL, NULL, COUNT(*))
),
(
'glean_upload_pending_pings',
'counter',
'',
'max',
max(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
),
(
'glean_upload_pending_pings',
'counter',
'',
'min',
min(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
),
(
'glean_upload_pending_pings',
'counter',
'',
'sum',
sum(CAST(metrics.counter.glean_upload_pending_pings AS INT64))
),
(
'glean_validation_app_forceclosed_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
),
(
'glean_validation_app_forceclosed_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.glean_validation_app_forceclosed_count) IS NULL, NULL, COUNT(*))
),
(
'glean_validation_app_forceclosed_count',
'counter',
'',
'max',
max(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
),
(
'glean_validation_app_forceclosed_count',
'counter',
'',
'min',
min(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
),
(
'glean_validation_app_forceclosed_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
),
(
'glean_validation_baseline_ping_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
),
(
'glean_validation_baseline_ping_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.glean_validation_baseline_ping_count) IS NULL, NULL, COUNT(*))
),
(
'glean_validation_baseline_ping_count',
'counter',
'',
'max',
max(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
),
(
'glean_validation_baseline_ping_count',
'counter',
'',
'min',
min(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
),
(
'glean_validation_baseline_ping_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
),
(
'logins_store_read_query_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.logins_store_read_query_count AS INT64))
),
(
'logins_store_read_query_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.logins_store_read_query_count) IS NULL, NULL, COUNT(*))
),
(
'logins_store_read_query_count',
'counter',
'',
'max',
max(CAST(metrics.counter.logins_store_read_query_count AS INT64))
),
(
'logins_store_read_query_count',
'counter',
'',
'min',
min(CAST(metrics.counter.logins_store_read_query_count AS INT64))
),
(
'logins_store_read_query_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.logins_store_read_query_count AS INT64))
),
(
'logins_store_unlock_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.logins_store_unlock_count AS INT64))
),
(
'logins_store_unlock_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.logins_store_unlock_count) IS NULL, NULL, COUNT(*))
),
(
'logins_store_unlock_count',
'counter',
'',
'max',
max(CAST(metrics.counter.logins_store_unlock_count AS INT64))
),
(
'logins_store_unlock_count',
'counter',
'',
'min',
min(CAST(metrics.counter.logins_store_unlock_count AS INT64))
),
(
'logins_store_unlock_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.logins_store_unlock_count AS INT64))
),
(
'logins_store_write_query_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.logins_store_write_query_count AS INT64))
),
(
'logins_store_write_query_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.logins_store_write_query_count) IS NULL, NULL, COUNT(*))
),
(
'logins_store_write_query_count',
'counter',
'',
'max',
max(CAST(metrics.counter.logins_store_write_query_count AS INT64))
),
(
'logins_store_write_query_count',
'counter',
'',
'min',
min(CAST(metrics.counter.logins_store_write_query_count AS INT64))
),
(
'logins_store_write_query_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.logins_store_write_query_count AS INT64))
),
(
'metrics_default_browser',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.metrics_default_browser AS INT64))
),
(
'metrics_default_browser',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.metrics_default_browser AS INT64))
),
(
'metrics_has_open_tabs',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.metrics_has_open_tabs AS INT64))
),
(
'metrics_has_open_tabs',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.metrics_has_open_tabs AS INT64))
),
(
'metrics_has_recent_pwas',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.metrics_has_recent_pwas AS INT64))
),
(
'metrics_has_recent_pwas',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.metrics_has_recent_pwas AS INT64))
),
(
'metrics_has_top_sites',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.metrics_has_top_sites AS INT64))
),
(
'metrics_has_top_sites',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.metrics_has_top_sites AS INT64))
),
(
'metrics_recently_used_pwa_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.metrics_recently_used_pwa_count AS INT64))
),
(
'metrics_recently_used_pwa_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.metrics_recently_used_pwa_count) IS NULL, NULL, COUNT(*))
),
(
'metrics_recently_used_pwa_count',
'counter',
'',
'max',
max(CAST(metrics.counter.metrics_recently_used_pwa_count AS INT64))
),
(
'metrics_recently_used_pwa_count',
'counter',
'',
'min',
min(CAST(metrics.counter.metrics_recently_used_pwa_count AS INT64))
),
(
'metrics_recently_used_pwa_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.metrics_recently_used_pwa_count AS INT64))
),
(
'metrics_search_widget_installed',
'boolean',
'',
'false',
SUM(CAST(NOT metrics.boolean.metrics_search_widget_installed AS INT64))
),
(
'metrics_search_widget_installed',
'boolean',
'',
'true',
SUM(CAST(metrics.boolean.metrics_search_widget_installed AS INT64))
),
(
'metrics_tabs_open_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.metrics_tabs_open_count AS INT64))
),
(
'metrics_tabs_open_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.metrics_tabs_open_count) IS NULL, NULL, COUNT(*))
),
(
'metrics_tabs_open_count',
'counter',
'',
'max',
max(CAST(metrics.counter.metrics_tabs_open_count AS INT64))
),
(
'metrics_tabs_open_count',
'counter',
'',
'min',
min(CAST(metrics.counter.metrics_tabs_open_count AS INT64))
),
(
'metrics_tabs_open_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.metrics_tabs_open_count AS INT64))
),
(
'metrics_top_sites_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.metrics_top_sites_count AS INT64))
),
(
'metrics_top_sites_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.metrics_top_sites_count) IS NULL, NULL, COUNT(*))
),
(
'metrics_top_sites_count',
'counter',
'',
'max',
max(CAST(metrics.counter.metrics_top_sites_count AS INT64))
),
(
'metrics_top_sites_count',
'counter',
'',
'min',
min(CAST(metrics.counter.metrics_top_sites_count AS INT64))
),
(
'metrics_top_sites_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.metrics_top_sites_count AS INT64))
),
(
'places_manager_read_query_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.places_manager_read_query_count AS INT64))
),
(
'places_manager_read_query_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.places_manager_read_query_count) IS NULL, NULL, COUNT(*))
),
(
'places_manager_read_query_count',
'counter',
'',
'max',
max(CAST(metrics.counter.places_manager_read_query_count AS INT64))
),
(
'places_manager_read_query_count',
'counter',
'',
'min',
min(CAST(metrics.counter.places_manager_read_query_count AS INT64))
),
(
'places_manager_read_query_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.places_manager_read_query_count AS INT64))
),
(
'places_manager_write_query_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.places_manager_write_query_count AS INT64))
),
(
'places_manager_write_query_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.places_manager_write_query_count) IS NULL, NULL, COUNT(*))
),
(
'places_manager_write_query_count',
'counter',
'',
'max',
max(CAST(metrics.counter.places_manager_write_query_count AS INT64))
),
(
'places_manager_write_query_count',
'counter',
'',
'min',
min(CAST(metrics.counter.places_manager_write_query_count AS INT64))
),
(
'places_manager_write_query_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.places_manager_write_query_count AS INT64))
)
] AS scalar_aggregates
FROM
extracted
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel
),
grouped_labeled_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
(
'browser_search_ad_clicks',
'labeled_counter',
metrics.labeled_counter.browser_search_ad_clicks
),
(
'browser_search_in_content',
'labeled_counter',
metrics.labeled_counter.browser_search_in_content
),
(
'browser_search_with_ads',
'labeled_counter',
metrics.labeled_counter.browser_search_with_ads
),
(
'crash_metrics_crash_count',
'labeled_counter',
metrics.labeled_counter.crash_metrics_crash_count
),
(
'gfx_content_frame_time_reason',
'labeled_counter',
metrics.labeled_counter.gfx_content_frame_time_reason
),
(
'glean_error_invalid_label',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_label
),
(
'glean_error_invalid_overflow',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_overflow
),
(
'glean_error_invalid_state',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_state
),
(
'glean_error_invalid_value',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_value
),
(
'glean_upload_ping_upload_failure',
'labeled_counter',
metrics.labeled_counter.glean_upload_ping_upload_failure
),
(
'logins_store_read_query_error_count',
'labeled_counter',
metrics.labeled_counter.logins_store_read_query_error_count
),
(
'logins_store_unlock_error_count',
'labeled_counter',
metrics.labeled_counter.logins_store_unlock_error_count
),
(
'logins_store_write_query_error_count',
'labeled_counter',
metrics.labeled_counter.logins_store_write_query_error_count
),
('metrics_search_count', 'labeled_counter', metrics.labeled_counter.metrics_search_count),
(
'places_manager_read_query_error_count',
'labeled_counter',
metrics.labeled_counter.places_manager_read_query_error_count
),
(
'places_manager_write_query_error_count',
'labeled_counter',
metrics.labeled_counter.places_manager_write_query_error_count
)
] AS metrics
FROM
extracted
),
flattened_labeled_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metrics.name AS metric,
metrics.type AS metric_type,
value.key AS key,
value.value AS value
FROM
grouped_labeled_metrics
CROSS JOIN
UNNEST(metrics) AS metrics,
UNNEST(metrics.value) AS value
),
aggregated_labeled_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key,
MAX(value) AS max,
MIN(value) AS min,
AVG(value) AS avg,
SUM(value) AS sum,
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
FROM
flattened_labeled_metrics
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key
),
labeled_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY_CONCAT_AGG(
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(metric, metric_type, key, 'max', max),
(metric, metric_type, key, 'min', min),
(metric, metric_type, key, 'avg', avg),
(metric, metric_type, key, 'sum', sum),
(metric, metric_type, key, 'count', count)
]
) AS scalar_aggregates
FROM
aggregated_labeled_metrics
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel
)
SELECT
*
FROM
unlabeled_metrics
UNION ALL
SELECT
*
FROM
labeled_metrics

Просмотреть файл

@ -0,0 +1,10 @@
-- view for org_mozilla_fennec_aurora__view_clients_daily_histogram_aggregates_v1;
-- View for histogram aggregates that handles time-partitioning
CREATE OR REPLACE VIEW
`glam-fenix-dev.glam_etl.org_mozilla_fennec_aurora__view_clients_daily_histogram_aggregates_v1`
AS
SELECT
* EXCEPT (submission_date),
DATE(_PARTITIONTIME) AS submission_date
FROM
`glam-fenix-dev.glam_etl.org_mozilla_fennec_aurora__clients_daily_histogram_aggregates*`

Просмотреть файл

@ -0,0 +1,10 @@
-- view for org_mozilla_fennec_aurora__view_clients_daily_scalar_aggregates_v1;
-- View to union daily scalar aggregates with date partitioning
CREATE OR REPLACE VIEW
`glam-fenix-dev.glam_etl.org_mozilla_fennec_aurora__view_clients_daily_scalar_aggregates_v1`
AS
SELECT
* EXCEPT (submission_date),
DATE(_PARTITIONTIME) AS submission_date
FROM
`glam-fenix-dev.glam_etl.org_mozilla_fennec_aurora__clients_daily_scalar_aggregates*`

Просмотреть файл

@ -1,14 +0,0 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix_glam_release__view_clients_daily_histogram_aggregates_v1
AS
WITH extracted AS (
SELECT
*
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_firefox__view_clients_daily_histogram_aggregates_v1
)
SELECT
* EXCEPT (channel),
"*" AS channel
FROM
extracted

Просмотреть файл

@ -1,14 +0,0 @@
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_fenix_glam_release__view_clients_daily_scalar_aggregates_v1
AS
WITH extracted AS (
SELECT
*
FROM
`moz-fx-data-shared-prod`.glam_etl.org_mozilla_firefox__view_clients_daily_scalar_aggregates_v1
)
SELECT
* EXCEPT (channel),
"*" AS channel
FROM
extracted

Просмотреть файл

@ -1,29 +0,0 @@
-- view for org_mozilla_fenix_glam_release__view_probe_counts_v1;
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.glam_etl.org_mozilla_fenix_glam_release__view_probe_counts_v1`
AS
WITH all_counts AS (
SELECT
*
FROM
`moz-fx-data-shared-prod.glam_etl.org_mozilla_fenix_glam_release__scalar_probe_counts_v1`
UNION ALL
SELECT
*
FROM
`moz-fx-data-shared-prod.glam_etl.org_mozilla_fenix_glam_release__histogram_probe_counts_v1`
UNION ALL
SELECT
*
FROM
`moz-fx-data-shared-prod.glam_etl.org_mozilla_fenix_glam_release__scalar_percentiles_v1`
UNION ALL
SELECT
*
FROM
`moz-fx-data-shared-prod.glam_etl.org_mozilla_fenix_glam_release__histogram_percentiles_v1`
)
SELECT
*
FROM
all_counts

Просмотреть файл

@ -1,10 +0,0 @@
-- view for org_mozilla_firefox__view_clients_daily_histogram_aggregates_v1;
-- View for histogram aggregates that handles time-partitioning
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.glam_etl.org_mozilla_firefox__view_clients_daily_histogram_aggregates_v1`
AS
SELECT
* EXCEPT (submission_date),
DATE(_PARTITIONTIME) AS submission_date
FROM
`moz-fx-data-shared-prod.glam_etl.org_mozilla_firefox__clients_daily_histogram_aggregates*`

Просмотреть файл

@ -1,10 +0,0 @@
-- view for org_mozilla_firefox__view_clients_daily_scalar_aggregates_v1;
-- View to union daily scalar aggregates with date partitioning
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.glam_etl.org_mozilla_firefox__view_clients_daily_scalar_aggregates_v1`
AS
SELECT
* EXCEPT (submission_date),
DATE(_PARTITIONTIME) AS submission_date
FROM
`moz-fx-data-shared-prod.glam_etl.org_mozilla_firefox__clients_daily_scalar_aggregates*`