Bug 1610983 - Add clients daily scalar aggregates for GLAM in Fenix (#724)

* Add copy of clients_daily_scalar_aggregates for fenix

* Change table to Fenix metrics ping and modify columns

* Modify get_scalar_probes to fetch the relevant list of metrics

* Remove logic for keyed booleans

* Add valid generated SQL for scalars

* Generate valid keyed_scalars

* Factor out attributes into reusable string

* Use the bigquery-etl formatter

* Add `--no-parameterize` flag for debugging in console

* Add option for table_id

* Add comma conditionally

* Add script to run against all Glean pings in dataset

* Move scripts into appropriate locations

* Use stable tables as source for generate script

* Report glean metric types instead of scalar/keyed-scalar

* Fix linting

* Add script to generate sql for each table in org_mozilla_fenix

* Add generated sql

* Rename script for running etl in testing environment

* Update run script to use generated sql

* Fix missing --table-id parameter

* Update header comment in script

* Update generated sql

* Add ping_type to list of attributes

* Update generated schemas
This commit is contained in:
Anthony Miyaguchi 2020-02-06 14:01:25 -08:00 коммит произвёл GitHub
Родитель 4e8f7de3ba
Коммит f32f866129
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
21 изменённых файлов: 3754 добавлений и 0 удалений

Просмотреть файл

@ -0,0 +1,319 @@
#!/usr/bin/env python3
"""clients_daily_scalar_aggregates query generator."""
import argparse
import json
import subprocess
import sys
from typing import Dict, List
from bigquery_etl.format_sql.formatter import reformat
parser = argparse.ArgumentParser()
parser.add_argument(
"--agg-type", type=str, help="One of scalar/keyed-scalar", required=True
)
parser.add_argument(
"--no-parameterize", action="store_true", help="Generate a query without parameters"
)
parser.add_argument(
"--table-id",
type=str,
help="Name of Glean table",
default="org_mozilla_fenix_stable.metrics_v1",
)
ATTRIBUTES = ",".join(
[
"client_id",
"ping_type",
"submission_date",
"os",
"app_version",
"app_build_id",
"channel",
]
)
def generate_sql(
table_id,
agg_type,
aggregates,
additional_queries,
additional_partitions,
select_clause,
querying_table,
no_parameterize=False,
):
"""Create a SQL query for the clients_daily_scalar_aggregates dataset."""
# TODO: What is the right granularity for Fenix versioning?
# TODO: Channels have a different meaning in Glean, what should the set be?
# If set to 1 day, then runs of copy_deduplicate may not be done yet
date = (
"date_sub(current_date, interval 2 day)"
if no_parameterize
else "@submission_date"
)
comment = (
"Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates "
f"--agg-type {agg_type} --table-id {table_id}"
+ (" --no-parameterize" if no_parameterize else "")
)
return f"""-- {comment}
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") as ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.{table_id}`
WHERE
DATE(submission_timestamp) = {date}
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
{additional_queries}
aggregated AS (
SELECT
{ATTRIBUTES},
{aggregates}
FROM {querying_table}
GROUP BY
{ATTRIBUTES}
{"," if additional_partitions else ""}
{additional_partitions})
{select_clause}
"""
def _get_generic_keyed_scalar_sql(probes, value_type):
probes_struct = []
for metric_type, probes in probes.items():
for probe in probes:
probes_struct.append(
f"('{probe}', '{metric_type}', metrics.{metric_type}.{probe})"
)
probes_struct.sort()
probes_arr = ",\n".join(probes_struct)
additional_queries = f"""
grouped_metrics AS
(SELECT
{ATTRIBUTES},
ARRAY<STRUCT<
name STRING,
type STRING,
value ARRAY<STRUCT<key STRING, value {value_type}>>
>>[
{probes_arr}
] as metrics
FROM filtered),
flattened_metrics AS
(SELECT
{ATTRIBUTES},
metrics.name AS metric,
metrics.type as metric_type,
value.key AS key,
value.value AS value
FROM grouped_metrics
CROSS JOIN UNNEST(metrics) AS metrics,
UNNEST(metrics.value) AS value),
"""
return {
"additional_queries": additional_queries,
"additional_partitions": "metric, metric_type, key",
"querying_table": "flattened_metrics",
}
def get_keyed_scalar_probes_sql_string(probes):
"""Put together the subsets of SQL required to query keyed scalars."""
sql_strings = _get_generic_keyed_scalar_sql(probes, "INT64")
sql_strings[
"probes_string"
] = """
metric,
metric_type,
key,
MAX(value) AS max,
MIN(value) AS min,
AVG(value) AS avg,
SUM(value) AS sum,
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
"""
sql_strings[
"select_clause"
] = f"""
SELECT
{ATTRIBUTES},
ARRAY_CONCAT_AGG(ARRAY<STRUCT<
metric STRING,
metric_type STRING,
key STRING,
agg_type STRING,
value FLOAT64
>>
[
(metric, metric_type, key, 'max', max),
(metric, metric_type, key, 'min', min),
(metric, metric_type, key, 'avg', avg),
(metric, metric_type, key, 'sum', sum),
(metric, metric_type, key, 'count', count)
]
) AS scalar_aggregates
FROM aggregated
GROUP BY
{ATTRIBUTES}
"""
return sql_strings
def get_scalar_probes_sql_strings(
probes: Dict[str, List[str]], scalar_type: str
) -> Dict[str, str]:
"""Put together the subsets of SQL required to query scalars or booleans."""
if scalar_type == "keyed_scalars":
return get_keyed_scalar_probes_sql_string(
{"labeled_counter": probes["labeled_counter"]}
)
probe_structs = []
for probe in probes.pop("boolean", []):
probe_structs.append(
(
f"('{probe}', 'boolean', '', 'false', "
f"SUM(case when metrics.boolean.{probe} = False "
"THEN 1 ELSE 0 END))"
)
)
probe_structs.append(
(
f"('{probe}', 'boolean', '', 'true', "
f"SUM(case when metrics.boolean.{probe} = True "
"THEN 1 ELSE 0 END))"
)
)
for metric_type, probes in probes.items():
for probe in probes:
for agg_func in ["max", "avg", "min", "sum"]:
probe_structs.append(
(
f"('{probe}', '{metric_type}', '', '{agg_func}', "
f"{agg_func}(CAST(metrics.{metric_type}.{probe} AS INT64)))"
)
)
probe_structs.append(
f"('{probe}', '{metric_type}', '', 'count', "
f"IF(MIN(metrics.{metric_type}.{probe}) IS NULL, NULL, COUNT(*)))"
)
probe_structs.sort()
probes_arr = ",\n".join(probe_structs)
probes_string = f"""
ARRAY<STRUCT<
metric STRING,
metric_type STRING,
key STRING,
agg_type STRING,
value FLOAT64
>> [
{probes_arr}
] AS scalar_aggregates
"""
select_clause = f"""
SELECT *
FROM aggregated
"""
return {"probes_string": probes_string, "select_clause": select_clause}
def get_schema(table: str, project: str = "moz-fx-data-shared-prod"):
"""Return the dictionary representation of the BigQuery table schema.
This returns types in the legacy SQL format.
"""
process = subprocess.Popen(
["bq", "show", "--schema", "--format=json", f"{project}:{table}"],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
stdout, stderr = process.communicate()
if process.returncode > 0:
raise Exception(
f"Call to bq exited non-zero: {process.returncode}", stdout, stderr
)
return json.loads(stdout)
def get_scalar_probes(schema: Dict, scalar_type: str) -> Dict[str, List[str]]:
"""Find all scalar probes in a Glean table.
Metric types are defined in the Glean documentation found here:
https://mozilla.github.io/glean/book/user/metrics/index.html
"""
metric_type_set = {
"scalars": ["boolean", "counter", "quantity"],
"keyed_scalars": ["labeled_counter"],
}
scalars = {metric_type: [] for metric_type in metric_type_set[scalar_type]}
# Iterate over every element in the schema under the metrics section and
# collect a list of metric names.
for root_field in schema:
if root_field["name"] != "metrics":
continue
for metric_field in root_field["fields"]:
metric_type = metric_field["name"]
if metric_type not in metric_type_set[scalar_type]:
continue
for field in metric_field["fields"]:
scalars[metric_type].append(field["name"])
return scalars
def main(argv, out=print):
"""Print a clients_daily_scalar_aggregates query to stdout."""
opts = vars(parser.parse_args(argv[1:]))
sql_string = ""
scalar_type = opts["agg_type"]
if scalar_type not in ("scalars", "keyed_scalars"):
raise ValueError("agg-type must be one of scalars, keyed_scalars")
table_id = opts["table_id"]
schema = get_schema(table_id)
scalar_probes = get_scalar_probes(schema, scalar_type)
sql_string = get_scalar_probes_sql_strings(scalar_probes, scalar_type)
out(
reformat(
generate_sql(
table_id,
scalar_type,
sql_string["probes_string"],
sql_string.get("additional_queries", ""),
sql_string.get("additional_partitions", ""),
sql_string["select_clause"],
sql_string.get("querying_table", "filtered"),
no_parameterize=opts["no_parameterize"],
)
)
)
if __name__ == "__main__":
main(sys.argv)

28
script/glam/generate_fenix_sql Executable file
Просмотреть файл

@ -0,0 +1,28 @@
#!/bin/bash
set -e
cd "$(dirname "$0")/../.."
project=moz-fx-data-shared-prod
dataset=org_mozilla_fenix_stable
# e.g. baseline_v1
tables=$(bq ls --format=json $project:$dataset | \
jq -r '.[] | .tableReference.tableId')
function write_sql {
local table=$1
local type=$2
local directory="sql/glam_etl/fenix_clients_daily_${type}_aggregates_${table}"
mkdir -p "$directory"
python3 -m bigquery_etl.glam.glean_scalar_aggregates \
--agg-type "$type" \
--table-id "$dataset.$table" \
> "$directory/query.sql"
echo "generated $directory/query.sql"
}
for table in $tables; do
write_sql "$table" scalars
write_sql "$table" keyed_scalars
done

42
script/glam/run_fenix_sql Executable file
Просмотреть файл

@ -0,0 +1,42 @@
#!/bin/bash
# Run the scalars sql job against all Glean pings in a product
set -e
cd "$(dirname "$0")/../.."
# date is not consistent across macos and linux
function yesterday {
python3 - <<EOD
from datetime import date, timedelta
dt = date.today() - timedelta(1)
print(dt.strftime("%Y-%m-%d"))
EOD
}
original_project=$(gcloud config get-value project)
function cleanup {
gcloud config set project $original_project
}
trap cleanup EXIT
project="glam-fenix-dev"
dataset="glam_etl_fenix_test"
gcloud config set project $project
# force delete the dataset
bq rm -r -f $dataset
bq mk $dataset
# run against previously generated etl
for query in sql/glam_etl/fenix_clients_daily*scalars*/query.sql; do
echo "running $query"
bq query \
--max_rows=0 \
--use_legacy_sql=false \
--append_table \
--project_id=$project \
--dataset_id=$dataset \
--destination_table=fenix_clients_daily_scalar_aggregates_v1 \
--parameter=submission_date:DATE:"$(yesterday)" \
< $query
done

Просмотреть файл

@ -0,0 +1,129 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type keyed_scalars --table-id org_mozilla_fenix_stable.activation_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.activation_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
grouped_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
(
'glean_error_invalid_label',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_label
),
(
'glean_error_invalid_overflow',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_overflow
),
(
'glean_error_invalid_state',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_state
),
(
'glean_error_invalid_value',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_value
)
] AS metrics
FROM
filtered
),
flattened_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metrics.name AS metric,
metrics.type AS metric_type,
value.key AS key,
value.value AS value
FROM
grouped_metrics
CROSS JOIN
UNNEST(metrics) AS metrics,
UNNEST(metrics.value) AS value
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key,
MAX(value) AS max,
MIN(value) AS min,
AVG(value) AS avg,
SUM(value) AS sum,
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
FROM
flattened_metrics
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key
)
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY_CONCAT_AGG(
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(metric, metric_type, key, 'max', max),
(metric, metric_type, key, 'min', min),
(metric, metric_type, key, 'avg', avg),
(metric, metric_type, key, 'sum', sum),
(metric, metric_type, key, 'count', count)
]
) AS scalar_aggregates
FROM
aggregated
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel

Просмотреть файл

@ -0,0 +1,130 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type keyed_scalars --table-id org_mozilla_fenix_stable.baseline_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.baseline_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
grouped_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
(
'glean_error_invalid_label',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_label
),
(
'glean_error_invalid_overflow',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_overflow
),
(
'glean_error_invalid_state',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_state
),
(
'glean_error_invalid_value',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_value
),
('metrics_search_count', 'labeled_counter', metrics.labeled_counter.metrics_search_count)
] AS metrics
FROM
filtered
),
flattened_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metrics.name AS metric,
metrics.type AS metric_type,
value.key AS key,
value.value AS value
FROM
grouped_metrics
CROSS JOIN
UNNEST(metrics) AS metrics,
UNNEST(metrics.value) AS value
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key,
MAX(value) AS max,
MIN(value) AS min,
AVG(value) AS avg,
SUM(value) AS sum,
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
FROM
flattened_metrics
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key
)
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY_CONCAT_AGG(
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(metric, metric_type, key, 'max', max),
(metric, metric_type, key, 'min', min),
(metric, metric_type, key, 'avg', avg),
(metric, metric_type, key, 'sum', sum),
(metric, metric_type, key, 'count', count)
]
) AS scalar_aggregates
FROM
aggregated
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel

Просмотреть файл

@ -0,0 +1,144 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type keyed_scalars --table-id org_mozilla_fenix_stable.bookmarks_sync_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.bookmarks_sync_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
grouped_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
(
'bookmarks_sync_incoming',
'labeled_counter',
metrics.labeled_counter.bookmarks_sync_incoming
),
(
'bookmarks_sync_outgoing',
'labeled_counter',
metrics.labeled_counter.bookmarks_sync_outgoing
),
(
'bookmarks_sync_remote_tree_problems',
'labeled_counter',
metrics.labeled_counter.bookmarks_sync_remote_tree_problems
),
(
'glean_error_invalid_label',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_label
),
(
'glean_error_invalid_overflow',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_overflow
),
(
'glean_error_invalid_state',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_state
),
(
'glean_error_invalid_value',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_value
)
] AS metrics
FROM
filtered
),
flattened_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metrics.name AS metric,
metrics.type AS metric_type,
value.key AS key,
value.value AS value
FROM
grouped_metrics
CROSS JOIN
UNNEST(metrics) AS metrics,
UNNEST(metrics.value) AS value
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key,
MAX(value) AS max,
MIN(value) AS min,
AVG(value) AS avg,
SUM(value) AS sum,
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
FROM
flattened_metrics
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key
)
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY_CONCAT_AGG(
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(metric, metric_type, key, 'max', max),
(metric, metric_type, key, 'min', min),
(metric, metric_type, key, 'avg', avg),
(metric, metric_type, key, 'sum', sum),
(metric, metric_type, key, 'count', count)
]
) AS scalar_aggregates
FROM
aggregated
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel

Просмотреть файл

@ -0,0 +1,129 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type keyed_scalars --table-id org_mozilla_fenix_stable.deletion_request_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.deletion_request_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
grouped_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
(
'glean_error_invalid_label',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_label
),
(
'glean_error_invalid_overflow',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_overflow
),
(
'glean_error_invalid_state',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_state
),
(
'glean_error_invalid_value',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_value
)
] AS metrics
FROM
filtered
),
flattened_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metrics.name AS metric,
metrics.type AS metric_type,
value.key AS key,
value.value AS value
FROM
grouped_metrics
CROSS JOIN
UNNEST(metrics) AS metrics,
UNNEST(metrics.value) AS value
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key,
MAX(value) AS max,
MIN(value) AS min,
AVG(value) AS avg,
SUM(value) AS sum,
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
FROM
flattened_metrics
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key
)
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY_CONCAT_AGG(
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(metric, metric_type, key, 'max', max),
(metric, metric_type, key, 'min', min),
(metric, metric_type, key, 'avg', avg),
(metric, metric_type, key, 'sum', sum),
(metric, metric_type, key, 'count', count)
]
) AS scalar_aggregates
FROM
aggregated
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel

Просмотреть файл

@ -0,0 +1,129 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type keyed_scalars --table-id org_mozilla_fenix_stable.events_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.events_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
grouped_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
(
'glean_error_invalid_label',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_label
),
(
'glean_error_invalid_overflow',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_overflow
),
(
'glean_error_invalid_state',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_state
),
(
'glean_error_invalid_value',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_value
)
] AS metrics
FROM
filtered
),
flattened_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metrics.name AS metric,
metrics.type AS metric_type,
value.key AS key,
value.value AS value
FROM
grouped_metrics
CROSS JOIN
UNNEST(metrics) AS metrics,
UNNEST(metrics.value) AS value
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key,
MAX(value) AS max,
MIN(value) AS min,
AVG(value) AS avg,
SUM(value) AS sum,
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
FROM
flattened_metrics
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key
)
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY_CONCAT_AGG(
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(metric, metric_type, key, 'max', max),
(metric, metric_type, key, 'min', min),
(metric, metric_type, key, 'avg', avg),
(metric, metric_type, key, 'sum', sum),
(metric, metric_type, key, 'count', count)
]
) AS scalar_aggregates
FROM
aggregated
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel

Просмотреть файл

@ -0,0 +1,131 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type keyed_scalars --table-id org_mozilla_fenix_stable.history_sync_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.history_sync_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
grouped_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
(
'glean_error_invalid_label',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_label
),
(
'glean_error_invalid_overflow',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_overflow
),
(
'glean_error_invalid_state',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_state
),
(
'glean_error_invalid_value',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_value
),
('history_sync_incoming', 'labeled_counter', metrics.labeled_counter.history_sync_incoming),
('history_sync_outgoing', 'labeled_counter', metrics.labeled_counter.history_sync_outgoing)
] AS metrics
FROM
filtered
),
flattened_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metrics.name AS metric,
metrics.type AS metric_type,
value.key AS key,
value.value AS value
FROM
grouped_metrics
CROSS JOIN
UNNEST(metrics) AS metrics,
UNNEST(metrics.value) AS value
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key,
MAX(value) AS max,
MIN(value) AS min,
AVG(value) AS avg,
SUM(value) AS sum,
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
FROM
flattened_metrics
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key
)
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY_CONCAT_AGG(
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(metric, metric_type, key, 'max', max),
(metric, metric_type, key, 'min', min),
(metric, metric_type, key, 'avg', avg),
(metric, metric_type, key, 'sum', sum),
(metric, metric_type, key, 'count', count)
]
) AS scalar_aggregates
FROM
aggregated
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel

Просмотреть файл

@ -0,0 +1,131 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type keyed_scalars --table-id org_mozilla_fenix_stable.logins_sync_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.logins_sync_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
grouped_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
(
'glean_error_invalid_label',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_label
),
(
'glean_error_invalid_overflow',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_overflow
),
(
'glean_error_invalid_state',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_state
),
(
'glean_error_invalid_value',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_value
),
('logins_sync_incoming', 'labeled_counter', metrics.labeled_counter.logins_sync_incoming),
('logins_sync_outgoing', 'labeled_counter', metrics.labeled_counter.logins_sync_outgoing)
] AS metrics
FROM
filtered
),
flattened_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metrics.name AS metric,
metrics.type AS metric_type,
value.key AS key,
value.value AS value
FROM
grouped_metrics
CROSS JOIN
UNNEST(metrics) AS metrics,
UNNEST(metrics.value) AS value
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key,
MAX(value) AS max,
MIN(value) AS min,
AVG(value) AS avg,
SUM(value) AS sum,
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
FROM
flattened_metrics
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key
)
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY_CONCAT_AGG(
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(metric, metric_type, key, 'max', max),
(metric, metric_type, key, 'min', min),
(metric, metric_type, key, 'avg', avg),
(metric, metric_type, key, 'sum', sum),
(metric, metric_type, key, 'count', count)
]
) AS scalar_aggregates
FROM
aggregated
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel

Просмотреть файл

@ -0,0 +1,155 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type keyed_scalars --table-id org_mozilla_fenix_stable.metrics_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.metrics_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
grouped_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
(
'crash_metrics_crash_count',
'labeled_counter',
metrics.labeled_counter.crash_metrics_crash_count
),
(
'gfx_content_frame_time_reason',
'labeled_counter',
metrics.labeled_counter.gfx_content_frame_time_reason
),
(
'glean_error_invalid_label',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_label
),
(
'glean_error_invalid_overflow',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_overflow
),
(
'glean_error_invalid_state',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_state
),
(
'glean_error_invalid_value',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_value
),
(
'logins_store_read_query_error_count',
'labeled_counter',
metrics.labeled_counter.logins_store_read_query_error_count
),
(
'logins_store_unlock_error_count',
'labeled_counter',
metrics.labeled_counter.logins_store_unlock_error_count
),
(
'logins_store_write_query_error_count',
'labeled_counter',
metrics.labeled_counter.logins_store_write_query_error_count
),
('metrics_search_count', 'labeled_counter', metrics.labeled_counter.metrics_search_count)
] AS metrics
FROM
filtered
),
flattened_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metrics.name AS metric,
metrics.type AS metric_type,
value.key AS key,
value.value AS value
FROM
grouped_metrics
CROSS JOIN
UNNEST(metrics) AS metrics,
UNNEST(metrics.value) AS value
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key,
MAX(value) AS max,
MIN(value) AS min,
AVG(value) AS avg,
SUM(value) AS sum,
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
FROM
flattened_metrics
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key
)
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY_CONCAT_AGG(
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(metric, metric_type, key, 'max', max),
(metric, metric_type, key, 'min', min),
(metric, metric_type, key, 'avg', avg),
(metric, metric_type, key, 'sum', sum),
(metric, metric_type, key, 'count', count)
]
) AS scalar_aggregates
FROM
aggregated
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel

Просмотреть файл

@ -0,0 +1,144 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type keyed_scalars --table-id org_mozilla_fenix_stable.migration_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.migration_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
grouped_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<name STRING, type STRING, value ARRAY<STRUCT<key STRING, value INT64>>>>[
(
'glean_error_invalid_label',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_label
),
(
'glean_error_invalid_overflow',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_overflow
),
(
'glean_error_invalid_state',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_state
),
(
'glean_error_invalid_value',
'labeled_counter',
metrics.labeled_counter.glean_error_invalid_value
),
(
'migration_bookmarks_migrated',
'labeled_counter',
metrics.labeled_counter.migration_bookmarks_migrated
),
(
'migration_history_migrated',
'labeled_counter',
metrics.labeled_counter.migration_history_migrated
),
(
'migration_logins_failure_counts',
'labeled_counter',
metrics.labeled_counter.migration_logins_failure_counts
)
] AS metrics
FROM
filtered
),
flattened_metrics AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metrics.name AS metric,
metrics.type AS metric_type,
value.key AS key,
value.value AS value
FROM
grouped_metrics
CROSS JOIN
UNNEST(metrics) AS metrics,
UNNEST(metrics.value) AS value
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key,
MAX(value) AS max,
MIN(value) AS min,
AVG(value) AS avg,
SUM(value) AS sum,
IF(MIN(value) IS NULL, NULL, COUNT(*)) AS count
FROM
flattened_metrics
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
metric,
metric_type,
key
)
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY_CONCAT_AGG(
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(metric, metric_type, key, 'max', max),
(metric, metric_type, key, 'min', min),
(metric, metric_type, key, 'avg', avg),
(metric, metric_type, key, 'sum', sum),
(metric, metric_type, key, 'count', count)
]
) AS scalar_aggregates
FROM
aggregated
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel

Просмотреть файл

@ -0,0 +1,44 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type scalars --table-id org_mozilla_fenix_stable.activation_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.activation_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
] AS scalar_aggregates
FROM
filtered
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel
)
SELECT
*
FROM
aggregated

Просмотреть файл

@ -0,0 +1,114 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type scalars --table-id org_mozilla_fenix_stable.baseline_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.baseline_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(
'events_total_uri_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'events_total_uri_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.events_total_uri_count) IS NULL, NULL, COUNT(*))
),
(
'events_total_uri_count',
'counter',
'',
'max',
max(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'events_total_uri_count',
'counter',
'',
'min',
min(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'events_total_uri_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'glean_validation_metrics_ping_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.glean_validation_metrics_ping_count AS INT64))
),
(
'glean_validation_metrics_ping_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.glean_validation_metrics_ping_count) IS NULL, NULL, COUNT(*))
),
(
'glean_validation_metrics_ping_count',
'counter',
'',
'max',
max(CAST(metrics.counter.glean_validation_metrics_ping_count AS INT64))
),
(
'glean_validation_metrics_ping_count',
'counter',
'',
'min',
min(CAST(metrics.counter.glean_validation_metrics_ping_count AS INT64))
),
(
'glean_validation_metrics_ping_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.glean_validation_metrics_ping_count AS INT64))
)
] AS scalar_aggregates
FROM
filtered
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel
)
SELECT
*
FROM
aggregated

Просмотреть файл

@ -0,0 +1,79 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type scalars --table-id org_mozilla_fenix_stable.bookmarks_sync_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.bookmarks_sync_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(
'bookmarks_sync_outgoing_batches',
'counter',
'',
'avg',
avg(CAST(metrics.counter.bookmarks_sync_outgoing_batches AS INT64))
),
(
'bookmarks_sync_outgoing_batches',
'counter',
'',
'count',
IF(MIN(metrics.counter.bookmarks_sync_outgoing_batches) IS NULL, NULL, COUNT(*))
),
(
'bookmarks_sync_outgoing_batches',
'counter',
'',
'max',
max(CAST(metrics.counter.bookmarks_sync_outgoing_batches AS INT64))
),
(
'bookmarks_sync_outgoing_batches',
'counter',
'',
'min',
min(CAST(metrics.counter.bookmarks_sync_outgoing_batches AS INT64))
),
(
'bookmarks_sync_outgoing_batches',
'counter',
'',
'sum',
sum(CAST(metrics.counter.bookmarks_sync_outgoing_batches AS INT64))
)
] AS scalar_aggregates
FROM
filtered
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel
)
SELECT
*
FROM
aggregated

Просмотреть файл

@ -0,0 +1,44 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type scalars --table-id org_mozilla_fenix_stable.deletion_request_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.deletion_request_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
] AS scalar_aggregates
FROM
filtered
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel
)
SELECT
*
FROM
aggregated

Просмотреть файл

@ -0,0 +1,44 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type scalars --table-id org_mozilla_fenix_stable.events_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.events_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
] AS scalar_aggregates
FROM
filtered
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel
)
SELECT
*
FROM
aggregated

Просмотреть файл

@ -0,0 +1,79 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type scalars --table-id org_mozilla_fenix_stable.history_sync_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.history_sync_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(
'history_sync_outgoing_batches',
'counter',
'',
'avg',
avg(CAST(metrics.counter.history_sync_outgoing_batches AS INT64))
),
(
'history_sync_outgoing_batches',
'counter',
'',
'count',
IF(MIN(metrics.counter.history_sync_outgoing_batches) IS NULL, NULL, COUNT(*))
),
(
'history_sync_outgoing_batches',
'counter',
'',
'max',
max(CAST(metrics.counter.history_sync_outgoing_batches AS INT64))
),
(
'history_sync_outgoing_batches',
'counter',
'',
'min',
min(CAST(metrics.counter.history_sync_outgoing_batches AS INT64))
),
(
'history_sync_outgoing_batches',
'counter',
'',
'sum',
sum(CAST(metrics.counter.history_sync_outgoing_batches AS INT64))
)
] AS scalar_aggregates
FROM
filtered
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel
)
SELECT
*
FROM
aggregated

Просмотреть файл

@ -0,0 +1,79 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type scalars --table-id org_mozilla_fenix_stable.logins_sync_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.logins_sync_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(
'logins_sync_outgoing_batches',
'counter',
'',
'avg',
avg(CAST(metrics.counter.logins_sync_outgoing_batches AS INT64))
),
(
'logins_sync_outgoing_batches',
'counter',
'',
'count',
IF(MIN(metrics.counter.logins_sync_outgoing_batches) IS NULL, NULL, COUNT(*))
),
(
'logins_sync_outgoing_batches',
'counter',
'',
'max',
max(CAST(metrics.counter.logins_sync_outgoing_batches AS INT64))
),
(
'logins_sync_outgoing_batches',
'counter',
'',
'min',
min(CAST(metrics.counter.logins_sync_outgoing_batches AS INT64))
),
(
'logins_sync_outgoing_batches',
'counter',
'',
'sum',
sum(CAST(metrics.counter.logins_sync_outgoing_batches AS INT64))
)
] AS scalar_aggregates
FROM
filtered
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel
)
SELECT
*
FROM
aggregated

Просмотреть файл

@ -0,0 +1,485 @@
-- Query generated by: python3 -m bigquery_etl.glam.glean_scalar_aggregates --agg-type scalars --table-id org_mozilla_fenix_stable.metrics_v1
WITH filtered AS (
SELECT
*,
DATE(submission_timestamp) AS submission_date,
client_info.client_id,
REPLACE(ping_info.ping_type, "_", "-") AS ping_type,
SPLIT(client_info.app_display_version, '.')[OFFSET(0)] AS app_version,
client_info.os AS os,
client_info.app_build AS app_build_id,
client_info.app_channel AS channel
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.metrics_v1`
WHERE
DATE(submission_timestamp) = @submission_date
AND client_info.app_channel IN ("release", "fenixProduction")
AND client_info.client_id IS NOT NULL
),
aggregated AS (
SELECT
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel,
ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>[
(
'events_total_uri_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'events_total_uri_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.events_total_uri_count) IS NULL, NULL, COUNT(*))
),
(
'events_total_uri_count',
'counter',
'',
'max',
max(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'events_total_uri_count',
'counter',
'',
'min',
min(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'events_total_uri_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.events_total_uri_count AS INT64))
),
(
'gfx_adapter_primary_ram',
'quantity',
'',
'avg',
avg(CAST(metrics.quantity.gfx_adapter_primary_ram AS INT64))
),
(
'gfx_adapter_primary_ram',
'quantity',
'',
'count',
IF(MIN(metrics.quantity.gfx_adapter_primary_ram) IS NULL, NULL, COUNT(*))
),
(
'gfx_adapter_primary_ram',
'quantity',
'',
'max',
max(CAST(metrics.quantity.gfx_adapter_primary_ram AS INT64))
),
(
'gfx_adapter_primary_ram',
'quantity',
'',
'min',
min(CAST(metrics.quantity.gfx_adapter_primary_ram AS INT64))
),
(
'gfx_adapter_primary_ram',
'quantity',
'',
'sum',
sum(CAST(metrics.quantity.gfx_adapter_primary_ram AS INT64))
),
(
'gfx_display_count',
'quantity',
'',
'avg',
avg(CAST(metrics.quantity.gfx_display_count AS INT64))
),
(
'gfx_display_count',
'quantity',
'',
'count',
IF(MIN(metrics.quantity.gfx_display_count) IS NULL, NULL, COUNT(*))
),
(
'gfx_display_count',
'quantity',
'',
'max',
max(CAST(metrics.quantity.gfx_display_count AS INT64))
),
(
'gfx_display_count',
'quantity',
'',
'min',
min(CAST(metrics.quantity.gfx_display_count AS INT64))
),
(
'gfx_display_count',
'quantity',
'',
'sum',
sum(CAST(metrics.quantity.gfx_display_count AS INT64))
),
(
'gfx_display_primary_height',
'quantity',
'',
'avg',
avg(CAST(metrics.quantity.gfx_display_primary_height AS INT64))
),
(
'gfx_display_primary_height',
'quantity',
'',
'count',
IF(MIN(metrics.quantity.gfx_display_primary_height) IS NULL, NULL, COUNT(*))
),
(
'gfx_display_primary_height',
'quantity',
'',
'max',
max(CAST(metrics.quantity.gfx_display_primary_height AS INT64))
),
(
'gfx_display_primary_height',
'quantity',
'',
'min',
min(CAST(metrics.quantity.gfx_display_primary_height AS INT64))
),
(
'gfx_display_primary_height',
'quantity',
'',
'sum',
sum(CAST(metrics.quantity.gfx_display_primary_height AS INT64))
),
(
'gfx_display_primary_width',
'quantity',
'',
'avg',
avg(CAST(metrics.quantity.gfx_display_primary_width AS INT64))
),
(
'gfx_display_primary_width',
'quantity',
'',
'count',
IF(MIN(metrics.quantity.gfx_display_primary_width) IS NULL, NULL, COUNT(*))
),
(
'gfx_display_primary_width',
'quantity',
'',
'max',
max(CAST(metrics.quantity.gfx_display_primary_width AS INT64))
),
(
'gfx_display_primary_width',
'quantity',
'',
'min',
min(CAST(metrics.quantity.gfx_display_primary_width AS INT64))
),
(
'gfx_display_primary_width',
'quantity',
'',
'sum',
sum(CAST(metrics.quantity.gfx_display_primary_width AS INT64))
),
(
'gfx_status_headless',
'boolean',
'',
'false',
SUM(CASE WHEN metrics.boolean.gfx_status_headless = FALSE THEN 1 ELSE 0 END)
),
(
'gfx_status_headless',
'boolean',
'',
'true',
SUM(CASE WHEN metrics.boolean.gfx_status_headless = TRUE THEN 1 ELSE 0 END)
),
(
'glean_core_migration_successful',
'boolean',
'',
'false',
SUM(CASE WHEN metrics.boolean.glean_core_migration_successful = FALSE THEN 1 ELSE 0 END)
),
(
'glean_core_migration_successful',
'boolean',
'',
'true',
SUM(CASE WHEN metrics.boolean.glean_core_migration_successful = TRUE THEN 1 ELSE 0 END)
),
(
'glean_error_preinit_tasks_overflow',
'counter',
'',
'avg',
avg(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
),
(
'glean_error_preinit_tasks_overflow',
'counter',
'',
'count',
IF(MIN(metrics.counter.glean_error_preinit_tasks_overflow) IS NULL, NULL, COUNT(*))
),
(
'glean_error_preinit_tasks_overflow',
'counter',
'',
'max',
max(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
),
(
'glean_error_preinit_tasks_overflow',
'counter',
'',
'min',
min(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
),
(
'glean_error_preinit_tasks_overflow',
'counter',
'',
'sum',
sum(CAST(metrics.counter.glean_error_preinit_tasks_overflow AS INT64))
),
(
'glean_error_preinit_tasks_timeout',
'boolean',
'',
'false',
SUM(CASE WHEN metrics.boolean.glean_error_preinit_tasks_timeout = FALSE THEN 1 ELSE 0 END)
),
(
'glean_error_preinit_tasks_timeout',
'boolean',
'',
'true',
SUM(CASE WHEN metrics.boolean.glean_error_preinit_tasks_timeout = TRUE THEN 1 ELSE 0 END)
),
(
'glean_validation_app_forceclosed_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
),
(
'glean_validation_app_forceclosed_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.glean_validation_app_forceclosed_count) IS NULL, NULL, COUNT(*))
),
(
'glean_validation_app_forceclosed_count',
'counter',
'',
'max',
max(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
),
(
'glean_validation_app_forceclosed_count',
'counter',
'',
'min',
min(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
),
(
'glean_validation_app_forceclosed_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.glean_validation_app_forceclosed_count AS INT64))
),
(
'glean_validation_baseline_ping_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
),
(
'glean_validation_baseline_ping_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.glean_validation_baseline_ping_count) IS NULL, NULL, COUNT(*))
),
(
'glean_validation_baseline_ping_count',
'counter',
'',
'max',
max(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
),
(
'glean_validation_baseline_ping_count',
'counter',
'',
'min',
min(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
),
(
'glean_validation_baseline_ping_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.glean_validation_baseline_ping_count AS INT64))
),
(
'logins_store_read_query_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.logins_store_read_query_count AS INT64))
),
(
'logins_store_read_query_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.logins_store_read_query_count) IS NULL, NULL, COUNT(*))
),
(
'logins_store_read_query_count',
'counter',
'',
'max',
max(CAST(metrics.counter.logins_store_read_query_count AS INT64))
),
(
'logins_store_read_query_count',
'counter',
'',
'min',
min(CAST(metrics.counter.logins_store_read_query_count AS INT64))
),
(
'logins_store_read_query_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.logins_store_read_query_count AS INT64))
),
(
'logins_store_unlock_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.logins_store_unlock_count AS INT64))
),
(
'logins_store_unlock_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.logins_store_unlock_count) IS NULL, NULL, COUNT(*))
),
(
'logins_store_unlock_count',
'counter',
'',
'max',
max(CAST(metrics.counter.logins_store_unlock_count AS INT64))
),
(
'logins_store_unlock_count',
'counter',
'',
'min',
min(CAST(metrics.counter.logins_store_unlock_count AS INT64))
),
(
'logins_store_unlock_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.logins_store_unlock_count AS INT64))
),
(
'logins_store_write_query_count',
'counter',
'',
'avg',
avg(CAST(metrics.counter.logins_store_write_query_count AS INT64))
),
(
'logins_store_write_query_count',
'counter',
'',
'count',
IF(MIN(metrics.counter.logins_store_write_query_count) IS NULL, NULL, COUNT(*))
),
(
'logins_store_write_query_count',
'counter',
'',
'max',
max(CAST(metrics.counter.logins_store_write_query_count AS INT64))
),
(
'logins_store_write_query_count',
'counter',
'',
'min',
min(CAST(metrics.counter.logins_store_write_query_count AS INT64))
),
(
'logins_store_write_query_count',
'counter',
'',
'sum',
sum(CAST(metrics.counter.logins_store_write_query_count AS INT64))
),
(
'metrics_default_browser',
'boolean',
'',
'false',
SUM(CASE WHEN metrics.boolean.metrics_default_browser = FALSE THEN 1 ELSE 0 END)
),
(
'metrics_default_browser',
'boolean',
'',
'true',
SUM(CASE WHEN metrics.boolean.metrics_default_browser = TRUE THEN 1 ELSE 0 END)
)
] AS scalar_aggregates
FROM
filtered
GROUP BY
client_id,
ping_type,
submission_date,
os,
app_version,
app_build_id,
channel
)
SELECT
*
FROM
aggregated

Разница между файлами не показана из-за своего большого размера Загрузить разницу