Add baseline clients daily test (#1941)

* Update table_name_from_baseline to strip project

* Remove project ids from query to facilitate testing

* Rewrite require_partition_filter in tests

* Add basic tests for baseline clients daily
This commit is contained in:
Anthony Miyaguchi 2021-04-08 05:39:28 -07:00 коммит произвёл GitHub
Родитель 39071e8229
Коммит 459f64576c
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
22 изменённых файлов: 1504 добавлений и 27 удалений

Просмотреть файл

@ -27,11 +27,13 @@ def run_query(
project_id, baseline_table, date, dry_run, output_dir=None, output_only=False
):
"""Process a single table, potentially also writing out the generated queries."""
tables = table_names_from_baseline(baseline_table)
tables = table_names_from_baseline(baseline_table, include_project_id=False)
daily_table = tables["daily_table"]
daily_view = tables["daily_view"]
render_kwargs = dict(header="-- Generated via bigquery_etl.glean_usage\n")
render_kwargs = dict(
header="-- Generated via bigquery_etl.glean_usage\n", project_id=project_id
)
render_kwargs.update(tables)
job_kwargs = dict(use_legacy_sql=False, dry_run=dry_run)
@ -55,7 +57,7 @@ def run_query(
else:
# Table exists, so we will run the incremental query.
job_kwargs.update(
destination=f"{daily_table}${date.strftime('%Y%m%d')}",
destination=f"{project_id}.{daily_table}${date.strftime('%Y%m%d')}",
write_disposition=WriteDisposition.WRITE_TRUNCATE,
query_parameters=[ScalarQueryParameter("submission_date", "DATE", date)],
)

Просмотреть файл

@ -29,12 +29,13 @@ def run_query(
project_id, baseline_table, date, dry_run, output_dir=None, output_only=False
):
"""Process a single table, potentially also writing out the generated queries."""
tables = table_names_from_baseline(baseline_table)
tables = table_names_from_baseline(baseline_table, include_project_id=False)
table_id = tables["first_seen_table"]
view_id = tables["first_seen_view"]
render_kwargs = dict(
header="-- Generated via bigquery_etl.glean_usage\n",
project_id=project_id,
# do not match on org_mozilla_firefoxreality
fennec_id=any(
(f"{app_id}_stable" in baseline_table)
@ -47,11 +48,7 @@ def run_query(
]
),
)
render_kwargs.update(
# Remove the project from the table name, which is implicit in the
# query. It also doesn't play well with tests.
{key: ".".join(table_id.split(".")[1:]) for key, table_id in tables.items()}
)
render_kwargs.update(tables)
job_kwargs = dict(use_legacy_sql=False, dry_run=dry_run)
query_sql = render(QUERY_FILENAME, **render_kwargs)
@ -74,7 +71,7 @@ def run_query(
else:
# Table exists, so just overwrite the entire table with the day's results
job_kwargs.update(
destination=table_id,
destination=f"{project_id}.{table_id}",
write_disposition=WriteDisposition.WRITE_TRUNCATE,
query_parameters=[ScalarQueryParameter("submission_date", "DATE", date)],
)

Просмотреть файл

@ -29,12 +29,14 @@ def run_query(
project_id, baseline_table, date, dry_run, output_dir=None, output_only=False
):
"""Process a single table, potentially also writing out the generated queries."""
tables = table_names_from_baseline(baseline_table)
tables = table_names_from_baseline(baseline_table, include_project_id=False)
last_seen_table = tables["last_seen_table"]
last_seen_view = tables["last_seen_view"]
render_kwargs = dict(
header="-- Generated via bigquery_etl.glean_usage\n", usage_types=USAGE_TYPES
header="-- Generated via bigquery_etl.glean_usage\n",
project_id=project_id,
usage_types=USAGE_TYPES,
)
render_kwargs.update(tables)
job_kwargs = dict(use_legacy_sql=False, dry_run=dry_run)
@ -61,7 +63,7 @@ def run_query(
else:
# Table exists, so we will run the incremental query.
job_kwargs.update(
destination=f"{last_seen_table}${date.strftime('%Y%m%d')}",
destination=f"{project_id}.{last_seen_table}${date.strftime('%Y%m%d')}",
write_disposition=WriteDisposition.WRITE_TRUNCATE,
query_parameters=[ScalarQueryParameter("submission_date", "DATE", date)],
)

Просмотреть файл

@ -78,14 +78,16 @@ def list_baseline_tables(project_id, only_tables, table_filter):
]
def table_names_from_baseline(baseline_table):
def table_names_from_baseline(baseline_table, include_project_id=True):
"""Return a dict with full table IDs for derived tables and views.
:param baseline_table: stable table ID in project.dataset.table form
"""
prefix = re.sub(r"_stable\..+", "", baseline_table)
if not include_project_id:
prefix = ".".join(prefix.split(".")[1:])
return dict(
baseline_table=baseline_table,
baseline_table=f"{prefix}_stable.baseline_v1",
migration_table=f"{prefix}_stable.migration_v1",
daily_table=f"{prefix}_derived.baseline_clients_daily_v1",
last_seen_table=f"{prefix}_derived.baseline_clients_last_seen_v1",

Просмотреть файл

@ -6,4 +6,4 @@ AS
SELECT
*
FROM
`{{ daily_table }}`
`{{ project_id }}.{{ daily_table }}`

Просмотреть файл

@ -6,4 +6,4 @@ AS
SELECT
*
FROM
`{{ first_seen_table }}`
`{{ project_id }}.{{ first_seen_table }}`

Просмотреть файл

@ -9,4 +9,4 @@ SELECT
{% endfor %}
*
FROM
`{{ last_seen_table }}`
`{{ project_id }}.{{ last_seen_table }}`

Просмотреть файл

@ -139,6 +139,12 @@ class SqlTest(pytest.Item, pytest.File):
# rewrite all udfs as temporary
query = parse_routine.sub_local_routines(query, project_dir)
# if we're reading an initialization function, ensure that we're not
# using a partition filter since we rely on `select * from {table}`
query = query.replace(
"require_partition_filter = TRUE", "require_partition_filter = FALSE"
)
dataset_id = "_".join(self.fspath.strpath.split(os.path.sep)[-3:])
if "CIRCLE_BUILD_NUM" in os.environ:
dataset_id += f"_{os.environ['CIRCLE_BUILD_NUM']}"

Просмотреть файл

@ -1,6 +1,6 @@
-- Generated via bigquery_etl.glean_usage
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.org_mozilla_fenix.baseline_clients_daily`
`org_mozilla_fenix.baseline_clients_daily`
AS
SELECT
*

Просмотреть файл

@ -1,6 +1,6 @@
-- Generated via bigquery_etl.glean_usage
CREATE OR REPLACE VIEW
`moz-fx-data-shared-prod.org_mozilla_fenix.baseline_clients_last_seen`
`org_mozilla_fenix.baseline_clients_last_seen`
AS
SELECT
`moz-fx-data-shared-prod`.udf.pos_of_trailing_set_bit(days_seen_bits) AS days_since_seen,

Просмотреть файл

@ -1,6 +1,6 @@
-- Generated via bigquery_etl.glean_usage
CREATE TABLE IF NOT EXISTS
`moz-fx-data-shared-prod.org_mozilla_fenix_derived.baseline_clients_daily_v1`
`org_mozilla_fenix_derived.baseline_clients_daily_v1`
PARTITION BY
submission_date
CLUSTER BY
@ -33,7 +33,7 @@ WITH base AS (
normalized_os,
normalized_os_version,
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.baseline_v1`
`org_mozilla_fenix_stable.baseline_v1`
-- Baseline pings with 'foreground' reason were first introduced in early April 2020;
-- we initially excluded them from baseline_clients_daily so that we could measure
-- effects on KPIs. On 2020-08-25, we removed the filter on reason and backfilled. See:

Просмотреть файл

@ -23,7 +23,7 @@ WITH base AS (
normalized_os,
normalized_os_version,
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_stable.baseline_v1`
`org_mozilla_fenix_stable.baseline_v1`
-- Baseline pings with 'foreground' reason were first introduced in early April 2020;
-- we initially excluded them from baseline_clients_daily so that we could measure
-- effects on KPIs. On 2020-08-25, we removed the filter on reason and backfilled. See:

Просмотреть файл

@ -1,6 +1,6 @@
-- Generated via bigquery_etl.glean_usage
CREATE TABLE IF NOT EXISTS
`moz-fx-data-shared-prod.org_mozilla_fenix_derived.baseline_clients_last_seen_v1`
`org_mozilla_fenix_derived.baseline_clients_last_seen_v1`
PARTITION BY
submission_date
CLUSTER BY
@ -18,7 +18,7 @@ SELECT
-- the two tables to validate.
*
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_derived.baseline_clients_daily_v1`
`org_mozilla_fenix_derived.baseline_clients_daily_v1`
WHERE
-- Output empty table and read no input rows
FALSE

Просмотреть файл

@ -10,7 +10,7 @@ WITH _current AS (
) AS days_created_profile_bits,
* EXCEPT (submission_date)
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_derived.baseline_clients_daily_v1`
`org_mozilla_fenix_derived.baseline_clients_daily_v1`
WHERE
submission_date = @submission_date
),
@ -19,7 +19,7 @@ _previous AS (
SELECT
* EXCEPT (submission_date)
FROM
`moz-fx-data-shared-prod.org_mozilla_fenix_derived.baseline_clients_last_seen_v1`
`org_mozilla_fenix_derived.baseline_clients_last_seen_v1`
WHERE
submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY)
-- Filter out rows from yesterday that have now fallen outside the 28-day window.

Просмотреть файл

@ -0,0 +1,13 @@
---
- submission_date: 2021-04-01
client_id: client-1
sample_id: 0
first_run_date: 2021-03-01
durations: 60
app_channel: release
- submission_date: 2021-04-01
client_id: client-2
sample_id: 0
first_run_date: 2021-03-01
durations: 120
app_channel: not-release

Просмотреть файл

@ -0,0 +1,682 @@
[
{
"description": "A JSON string containing any payload properties not present in the schema",
"type": "STRING",
"name": "additional_properties",
"mode": "NULLABLE"
},
{
"fields": [
{
"description": "The optional Android specific SDK version of the software running on this hardware device.",
"type": "STRING",
"name": "android_sdk_version",
"mode": "NULLABLE"
},
{
"description": "The build identifier generated by the CI system (e.g. \"1234/A\"). For language bindings that provide automatic detection for this value, (e.g. Android/Kotlin), in the unlikely event that the build identifier can not be retrieved from the OS, it is set to \"inaccessible\". For other language bindings, if the value was not provided through configuration, this metric gets set to `Unknown`.",
"type": "STRING",
"name": "app_build",
"mode": "NULLABLE"
},
{
"description": "The channel the application is being distributed on.",
"type": "STRING",
"name": "app_channel",
"mode": "NULLABLE"
},
{
"description": "The user visible version string (e.g. \"1.0.3\"). In the unlikely event that the display version can not be retrieved, it is set to \"inaccessible\".",
"type": "STRING",
"name": "app_display_version",
"mode": "NULLABLE"
},
{
"description": "The architecture of the device, (e.g. \"arm\", \"x86\").",
"type": "STRING",
"name": "architecture",
"mode": "NULLABLE"
},
{
"description": "A UUID uniquely identifying the client.",
"type": "STRING",
"name": "client_id",
"mode": "NULLABLE"
},
{
"description": "The manufacturer of the device the application is running on. Not set if the device manufacturer can't be determined (e.g. on Desktop).",
"type": "STRING",
"name": "device_manufacturer",
"mode": "NULLABLE"
},
{
"description": "The model of the device the application is running on. On Android, this is Build.MODEL, the user-visible marketing name, like \"Pixel 2 XL\". Not set if the device model can't be determined (e.g. on Desktop).",
"type": "STRING",
"name": "device_model",
"mode": "NULLABLE"
},
{
"description": "The date of the first run of the application.",
"type": "STRING",
"name": "first_run_date",
"mode": "NULLABLE"
},
{
"description": "The name of the operating system. Possible values: Android, iOS, Linux, Darwin, Windows, FreeBSD, NetBSD, OpenBSD, Solaris, unknown",
"type": "STRING",
"name": "os",
"mode": "NULLABLE"
},
{
"description": "The user-visible version of the operating system (e.g. \"1.2.3\"). If the version detection fails, this metric gets set to `Unknown`.",
"type": "STRING",
"name": "os_version",
"mode": "NULLABLE"
},
{
"description": "The version of the Glean SDK",
"type": "STRING",
"name": "telemetry_sdk_build",
"mode": "NULLABLE"
},
{
"description": "The locale of the application during initialization (e.g. \"es-ES\"). If the locale can't be determined on the system, the value is [\"und\"](https://unicode.org/reports/tr35/#Unknown_or_Invalid_Identifiers), to indicate \"undetermined\".",
"type": "STRING",
"name": "locale",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "client_info",
"mode": "NULLABLE"
},
{
"description": "The document ID specified in the URI when the client sent this message",
"type": "STRING",
"name": "document_id",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "category",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "STRING",
"name": "value",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "extra",
"mode": "REPEATED"
},
{
"type": "STRING",
"name": "name",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "timestamp",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "events",
"mode": "REPEATED"
},
{
"fields": [
{
"fields": [
{
"type": "STRING",
"name": "city",
"mode": "NULLABLE"
},
{
"description": "An ISO 3166-1 alpha-2 country code",
"type": "STRING",
"name": "country",
"mode": "NULLABLE"
},
{
"description": "The specific geo database version used for this lookup",
"type": "STRING",
"name": "db_version",
"mode": "NULLABLE"
},
{
"description": "First major country subdivision, typically a state, province, or county",
"type": "STRING",
"name": "subdivision1",
"mode": "NULLABLE"
},
{
"description": "Second major country subdivision; not applicable for most countries",
"type": "STRING",
"name": "subdivision2",
"mode": "NULLABLE"
}
],
"description": "Results of a geographic lookup based on the client's IP address",
"type": "RECORD",
"name": "geo",
"mode": "NULLABLE"
},
{
"fields": [
{
"description": "Date HTTP header",
"type": "STRING",
"name": "date",
"mode": "NULLABLE"
},
{
"description": "DNT (Do Not Track) HTTP header",
"type": "STRING",
"name": "dnt",
"mode": "NULLABLE"
},
{
"description": "X-Debug-Id HTTP header",
"type": "STRING",
"name": "x_debug_id",
"mode": "NULLABLE"
},
{
"description": "X-PingSender-Version HTTP header",
"type": "STRING",
"name": "x_pingsender_version",
"mode": "NULLABLE"
},
{
"description": "X-Source-Tags HTTP header",
"type": "STRING",
"name": "x_source_tags",
"mode": "NULLABLE"
}
],
"description": "Headers included in the client's HTTP request",
"type": "RECORD",
"name": "header",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "browser",
"mode": "NULLABLE"
},
{
"type": "STRING",
"name": "os",
"mode": "NULLABLE"
},
{
"type": "STRING",
"name": "version",
"mode": "NULLABLE"
}
],
"description": "Parsed components of the client's user agent string",
"type": "RECORD",
"name": "user_agent",
"mode": "NULLABLE"
},
{
"fields": [
{
"description": "The specific geo ISP database version used for this lookup",
"type": "STRING",
"name": "db_version",
"mode": "NULLABLE"
},
{
"description": "The name of the ISP associated with the client's IP address",
"type": "STRING",
"name": "name",
"mode": "NULLABLE"
},
{
"description": "The name of a specific business entity associated with the client's IP address when available; otherwise the ISP name",
"type": "STRING",
"name": "organization",
"mode": "NULLABLE"
}
],
"description": "Results of ISP lookup based on the client's IP address",
"type": "RECORD",
"name": "isp",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "metadata",
"mode": "NULLABLE"
},
{
"fields": [
{
"fields": [
{
"description": "The locale of the application during initialization (e.g. \"es-ES\").\nIf the locale can't be determined on the system, the value is\n[\"und\"](https://unicode.org/reports/tr35/#Unknown_or_Invalid_Identifiers),\nto indicate \"undetermined\".\n",
"type": "STRING",
"name": "glean_baseline_locale",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "string",
"mode": "NULLABLE"
},
{
"fields": [
{
"fields": [
{
"type": "STRING",
"name": "time_unit",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "The duration of the last foreground session.\n",
"type": "RECORD",
"name": "glean_baseline_duration",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "timespan",
"mode": "NULLABLE"
},
{
"fields": [
{
"description": "A counter of URIs visited by the user in the current session, including\npage reloads. This does not include background page requests and URIs from\nembedded pages or private browsing but may be incremented without user\ninteraction by website scripts that programmatically redirect to a new\nlocation.\n",
"type": "INTEGER",
"name": "events_total_uri_count",
"mode": "NULLABLE"
},
{
"description": "The number of metrics pings sent during the lifetime of this baseline ping.",
"type": "INTEGER",
"name": "glean_validation_metrics_ping_count",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "counter",
"mode": "NULLABLE"
},
{
"fields": [
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "Counts the number of times a metric was set with an invalid label.\nThe labels are the `category.name` identifier of the metric.\n",
"type": "RECORD",
"name": "glean_error_invalid_label",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "Counts the number of times a metric was set to an invalid value.\nThe labels are the `category.name` identifier of the metric.\n",
"type": "RECORD",
"name": "glean_error_invalid_value",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "The labels for this counter are `<search-engine-name>.<source>`.\n\nIf the search engine is bundled with Fenix `search-engine-name` will be\nthe name of the search engine. If it's a custom search engine (defined:\nhttps://github.com/mozilla-mobile/fenix/issues/1607) the value will be\n`custom`.\n\n`source` will be: `action`, `suggestion`, `widget`, `shortcut`, `topsite`\n(depending on the source from which the search started). Also added the\n`other` option for the source but it should never enter on this case.\n",
"type": "RECORD",
"name": "metrics_search_count",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "Counts the number of times a timing metric was used incorrectly.\nThe labels are the `category.name` identifier of the metric.\n",
"type": "RECORD",
"name": "glean_error_invalid_state",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "Counts the number of times a metric was set a value that overflowed.\nThe labels are the `category.name` identifier of the metric.\n",
"type": "RECORD",
"name": "glean_error_invalid_overflow",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "Records clicks of adverts on SERP pages.\nThe key format is <provider-name>.\n",
"type": "RECORD",
"name": "browser_search_ad_clicks",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "Records the type of interaction a user has on SERP pages.\n",
"type": "RECORD",
"name": "browser_search_in_content",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "Records counts of SERP pages with adverts displayed.\nThe key format is <provider-name>.\n",
"type": "RECORD",
"name": "browser_search_with_ads",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "A count of the pings submitted, by ping type.\n\nThis metric appears in both the metrics and baseline pings.\n\n- On the metrics ping, the counts include the number of pings sent since\n the last metrics ping (including the last metrics ping)\n- On the baseline ping, the counts include the number of pings send since\n the last baseline ping (including the last baseline ping)\n",
"type": "RECORD",
"name": "glean_validation_pings_submitted",
"mode": "REPEATED"
}
],
"type": "RECORD",
"name": "labeled_counter",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "STRING",
"name": "value",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "jwe",
"mode": "REPEATED"
},
{
"fields": [
{
"description": "The hour of the first run of the application.\n",
"type": "STRING",
"name": "glean_validation_first_run_hour",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "datetime",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "INTEGER",
"name": "denominator",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "numerator",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "value",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "value",
"mode": "REPEATED"
}
],
"type": "RECORD",
"name": "labeled_rate",
"mode": "REPEATED"
}
],
"type": "RECORD",
"name": "metrics",
"mode": "NULLABLE"
},
{
"description": "Set to \"Other\" if this message contained an unrecognized app name",
"type": "STRING",
"name": "normalized_app_name",
"mode": "NULLABLE"
},
{
"description": "Set to \"Other\" if this message contained an unrecognized channel name",
"type": "STRING",
"name": "normalized_channel",
"mode": "NULLABLE"
},
{
"description": "An ISO 3166-1 alpha-2 country code",
"type": "STRING",
"name": "normalized_country_code",
"mode": "NULLABLE"
},
{
"description": "Set to \"Other\" if this message contained an unrecognized OS name",
"type": "STRING",
"name": "normalized_os",
"mode": "NULLABLE"
},
{
"type": "STRING",
"name": "normalized_os_version",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "end_time",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "branch",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "type",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "extra",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "value",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "experiments",
"mode": "REPEATED"
},
{
"type": "STRING",
"name": "ping_type",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "seq",
"mode": "NULLABLE"
},
{
"type": "STRING",
"name": "start_time",
"mode": "NULLABLE"
},
{
"type": "STRING",
"name": "reason",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "ping_info",
"mode": "NULLABLE"
},
{
"description": "Hashed version of client_id (if present) useful for partitioning; ranges from 0 to 99",
"type": "INTEGER",
"name": "sample_id",
"mode": "NULLABLE"
},
{
"description": "Time when the ingestion edge server accepted this message",
"type": "TIMESTAMP",
"name": "submission_timestamp",
"mode": "NULLABLE"
}
]

Просмотреть файл

@ -0,0 +1,29 @@
---
# simple case
- &ping
submission_timestamp: 2021-04-01T00:00:00
sample_id: 0
client_info: &client_info
client_id: client-1
first_run_date: 2021-03-01T00:00:00
# note, we can leave most of the metadata fields out, so we'll include the
# channel for testing the window function.
app_channel: release
ping_info:
end_time: 2021-04-01T01:00:00
metrics:
timespan:
glean_baseline_duration:
time_unit: second
value: 60
# multiple pings
- <<: *ping
client_info:
<<: *client_info
client_id: client-2
# the channel changes
- <<: *ping
client_info:
<<: *client_info
client_id: client-2
app_channel: not-release

Просмотреть файл

@ -0,0 +1,4 @@
---
- name: submission_date
type: DATE
value: 2021-04-01

Просмотреть файл

@ -0,0 +1,19 @@
---
- submission_date: 2021-04-01
client_id: client-1
sample_id: 0
first_run_date: 2021-03-01
durations: 60
app_channel: release
- submission_date: 2021-04-01
client_id: client-2
sample_id: 0
first_run_date: 2021-03-01
durations: 120
app_channel: not-release
- submission_date: 2021-04-02
client_id: client-3
sample_id: 0
first_run_date: 2021-03-01
durations: 60
app_channel: release

Просмотреть файл

@ -0,0 +1,682 @@
[
{
"description": "A JSON string containing any payload properties not present in the schema",
"type": "STRING",
"name": "additional_properties",
"mode": "NULLABLE"
},
{
"fields": [
{
"description": "The optional Android specific SDK version of the software running on this hardware device.",
"type": "STRING",
"name": "android_sdk_version",
"mode": "NULLABLE"
},
{
"description": "The build identifier generated by the CI system (e.g. \"1234/A\"). For language bindings that provide automatic detection for this value, (e.g. Android/Kotlin), in the unlikely event that the build identifier can not be retrieved from the OS, it is set to \"inaccessible\". For other language bindings, if the value was not provided through configuration, this metric gets set to `Unknown`.",
"type": "STRING",
"name": "app_build",
"mode": "NULLABLE"
},
{
"description": "The channel the application is being distributed on.",
"type": "STRING",
"name": "app_channel",
"mode": "NULLABLE"
},
{
"description": "The user visible version string (e.g. \"1.0.3\"). In the unlikely event that the display version can not be retrieved, it is set to \"inaccessible\".",
"type": "STRING",
"name": "app_display_version",
"mode": "NULLABLE"
},
{
"description": "The architecture of the device, (e.g. \"arm\", \"x86\").",
"type": "STRING",
"name": "architecture",
"mode": "NULLABLE"
},
{
"description": "A UUID uniquely identifying the client.",
"type": "STRING",
"name": "client_id",
"mode": "NULLABLE"
},
{
"description": "The manufacturer of the device the application is running on. Not set if the device manufacturer can't be determined (e.g. on Desktop).",
"type": "STRING",
"name": "device_manufacturer",
"mode": "NULLABLE"
},
{
"description": "The model of the device the application is running on. On Android, this is Build.MODEL, the user-visible marketing name, like \"Pixel 2 XL\". Not set if the device model can't be determined (e.g. on Desktop).",
"type": "STRING",
"name": "device_model",
"mode": "NULLABLE"
},
{
"description": "The date of the first run of the application.",
"type": "STRING",
"name": "first_run_date",
"mode": "NULLABLE"
},
{
"description": "The name of the operating system. Possible values: Android, iOS, Linux, Darwin, Windows, FreeBSD, NetBSD, OpenBSD, Solaris, unknown",
"type": "STRING",
"name": "os",
"mode": "NULLABLE"
},
{
"description": "The user-visible version of the operating system (e.g. \"1.2.3\"). If the version detection fails, this metric gets set to `Unknown`.",
"type": "STRING",
"name": "os_version",
"mode": "NULLABLE"
},
{
"description": "The version of the Glean SDK",
"type": "STRING",
"name": "telemetry_sdk_build",
"mode": "NULLABLE"
},
{
"description": "The locale of the application during initialization (e.g. \"es-ES\"). If the locale can't be determined on the system, the value is [\"und\"](https://unicode.org/reports/tr35/#Unknown_or_Invalid_Identifiers), to indicate \"undetermined\".",
"type": "STRING",
"name": "locale",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "client_info",
"mode": "NULLABLE"
},
{
"description": "The document ID specified in the URI when the client sent this message",
"type": "STRING",
"name": "document_id",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "category",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "STRING",
"name": "value",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "extra",
"mode": "REPEATED"
},
{
"type": "STRING",
"name": "name",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "timestamp",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "events",
"mode": "REPEATED"
},
{
"fields": [
{
"fields": [
{
"type": "STRING",
"name": "city",
"mode": "NULLABLE"
},
{
"description": "An ISO 3166-1 alpha-2 country code",
"type": "STRING",
"name": "country",
"mode": "NULLABLE"
},
{
"description": "The specific geo database version used for this lookup",
"type": "STRING",
"name": "db_version",
"mode": "NULLABLE"
},
{
"description": "First major country subdivision, typically a state, province, or county",
"type": "STRING",
"name": "subdivision1",
"mode": "NULLABLE"
},
{
"description": "Second major country subdivision; not applicable for most countries",
"type": "STRING",
"name": "subdivision2",
"mode": "NULLABLE"
}
],
"description": "Results of a geographic lookup based on the client's IP address",
"type": "RECORD",
"name": "geo",
"mode": "NULLABLE"
},
{
"fields": [
{
"description": "Date HTTP header",
"type": "STRING",
"name": "date",
"mode": "NULLABLE"
},
{
"description": "DNT (Do Not Track) HTTP header",
"type": "STRING",
"name": "dnt",
"mode": "NULLABLE"
},
{
"description": "X-Debug-Id HTTP header",
"type": "STRING",
"name": "x_debug_id",
"mode": "NULLABLE"
},
{
"description": "X-PingSender-Version HTTP header",
"type": "STRING",
"name": "x_pingsender_version",
"mode": "NULLABLE"
},
{
"description": "X-Source-Tags HTTP header",
"type": "STRING",
"name": "x_source_tags",
"mode": "NULLABLE"
}
],
"description": "Headers included in the client's HTTP request",
"type": "RECORD",
"name": "header",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "browser",
"mode": "NULLABLE"
},
{
"type": "STRING",
"name": "os",
"mode": "NULLABLE"
},
{
"type": "STRING",
"name": "version",
"mode": "NULLABLE"
}
],
"description": "Parsed components of the client's user agent string",
"type": "RECORD",
"name": "user_agent",
"mode": "NULLABLE"
},
{
"fields": [
{
"description": "The specific geo ISP database version used for this lookup",
"type": "STRING",
"name": "db_version",
"mode": "NULLABLE"
},
{
"description": "The name of the ISP associated with the client's IP address",
"type": "STRING",
"name": "name",
"mode": "NULLABLE"
},
{
"description": "The name of a specific business entity associated with the client's IP address when available; otherwise the ISP name",
"type": "STRING",
"name": "organization",
"mode": "NULLABLE"
}
],
"description": "Results of ISP lookup based on the client's IP address",
"type": "RECORD",
"name": "isp",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "metadata",
"mode": "NULLABLE"
},
{
"fields": [
{
"fields": [
{
"description": "The locale of the application during initialization (e.g. \"es-ES\").\nIf the locale can't be determined on the system, the value is\n[\"und\"](https://unicode.org/reports/tr35/#Unknown_or_Invalid_Identifiers),\nto indicate \"undetermined\".\n",
"type": "STRING",
"name": "glean_baseline_locale",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "string",
"mode": "NULLABLE"
},
{
"fields": [
{
"fields": [
{
"type": "STRING",
"name": "time_unit",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "The duration of the last foreground session.\n",
"type": "RECORD",
"name": "glean_baseline_duration",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "timespan",
"mode": "NULLABLE"
},
{
"fields": [
{
"description": "A counter of URIs visited by the user in the current session, including\npage reloads. This does not include background page requests and URIs from\nembedded pages or private browsing but may be incremented without user\ninteraction by website scripts that programmatically redirect to a new\nlocation.\n",
"type": "INTEGER",
"name": "events_total_uri_count",
"mode": "NULLABLE"
},
{
"description": "The number of metrics pings sent during the lifetime of this baseline ping.",
"type": "INTEGER",
"name": "glean_validation_metrics_ping_count",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "counter",
"mode": "NULLABLE"
},
{
"fields": [
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "Counts the number of times a metric was set with an invalid label.\nThe labels are the `category.name` identifier of the metric.\n",
"type": "RECORD",
"name": "glean_error_invalid_label",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "Counts the number of times a metric was set to an invalid value.\nThe labels are the `category.name` identifier of the metric.\n",
"type": "RECORD",
"name": "glean_error_invalid_value",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "The labels for this counter are `<search-engine-name>.<source>`.\n\nIf the search engine is bundled with Fenix `search-engine-name` will be\nthe name of the search engine. If it's a custom search engine (defined:\nhttps://github.com/mozilla-mobile/fenix/issues/1607) the value will be\n`custom`.\n\n`source` will be: `action`, `suggestion`, `widget`, `shortcut`, `topsite`\n(depending on the source from which the search started). Also added the\n`other` option for the source but it should never enter on this case.\n",
"type": "RECORD",
"name": "metrics_search_count",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "Counts the number of times a timing metric was used incorrectly.\nThe labels are the `category.name` identifier of the metric.\n",
"type": "RECORD",
"name": "glean_error_invalid_state",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "Counts the number of times a metric was set a value that overflowed.\nThe labels are the `category.name` identifier of the metric.\n",
"type": "RECORD",
"name": "glean_error_invalid_overflow",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "Records clicks of adverts on SERP pages.\nThe key format is <provider-name>.\n",
"type": "RECORD",
"name": "browser_search_ad_clicks",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "Records the type of interaction a user has on SERP pages.\n",
"type": "RECORD",
"name": "browser_search_in_content",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "Records counts of SERP pages with adverts displayed.\nThe key format is <provider-name>.\n",
"type": "RECORD",
"name": "browser_search_with_ads",
"mode": "REPEATED"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "value",
"mode": "NULLABLE"
}
],
"description": "A count of the pings submitted, by ping type.\n\nThis metric appears in both the metrics and baseline pings.\n\n- On the metrics ping, the counts include the number of pings sent since\n the last metrics ping (including the last metrics ping)\n- On the baseline ping, the counts include the number of pings send since\n the last baseline ping (including the last baseline ping)\n",
"type": "RECORD",
"name": "glean_validation_pings_submitted",
"mode": "REPEATED"
}
],
"type": "RECORD",
"name": "labeled_counter",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"type": "STRING",
"name": "value",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "jwe",
"mode": "REPEATED"
},
{
"fields": [
{
"description": "The hour of the first run of the application.\n",
"type": "STRING",
"name": "glean_validation_first_run_hour",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "datetime",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "INTEGER",
"name": "denominator",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "numerator",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "value",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "value",
"mode": "REPEATED"
}
],
"type": "RECORD",
"name": "labeled_rate",
"mode": "REPEATED"
}
],
"type": "RECORD",
"name": "metrics",
"mode": "NULLABLE"
},
{
"description": "Set to \"Other\" if this message contained an unrecognized app name",
"type": "STRING",
"name": "normalized_app_name",
"mode": "NULLABLE"
},
{
"description": "Set to \"Other\" if this message contained an unrecognized channel name",
"type": "STRING",
"name": "normalized_channel",
"mode": "NULLABLE"
},
{
"description": "An ISO 3166-1 alpha-2 country code",
"type": "STRING",
"name": "normalized_country_code",
"mode": "NULLABLE"
},
{
"description": "Set to \"Other\" if this message contained an unrecognized OS name",
"type": "STRING",
"name": "normalized_os",
"mode": "NULLABLE"
},
{
"type": "STRING",
"name": "normalized_os_version",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "end_time",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "key",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "branch",
"mode": "NULLABLE"
},
{
"fields": [
{
"type": "STRING",
"name": "type",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "extra",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "value",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "experiments",
"mode": "REPEATED"
},
{
"type": "STRING",
"name": "ping_type",
"mode": "NULLABLE"
},
{
"type": "INTEGER",
"name": "seq",
"mode": "NULLABLE"
},
{
"type": "STRING",
"name": "start_time",
"mode": "NULLABLE"
},
{
"type": "STRING",
"name": "reason",
"mode": "NULLABLE"
}
],
"type": "RECORD",
"name": "ping_info",
"mode": "NULLABLE"
},
{
"description": "Hashed version of client_id (if present) useful for partitioning; ranges from 0 to 99",
"type": "INTEGER",
"name": "sample_id",
"mode": "NULLABLE"
},
{
"description": "Time when the ingestion edge server accepted this message",
"type": "TIMESTAMP",
"name": "submission_timestamp",
"mode": "NULLABLE"
}
]

Просмотреть файл

@ -0,0 +1,35 @@
---
# simple case
- &ping
submission_timestamp: 2021-04-01T00:00:00
sample_id: 0
client_info: &client_info
client_id: client-1
first_run_date: 2021-03-01T00:00:00
# note, we can leave most of the metadata fields out, so we'll include the
# channel for testing the window function.
app_channel: release
ping_info:
end_time: 2021-04-01T01:00:00
metrics:
timespan:
glean_baseline_duration:
time_unit: second
value: 60
# multiple pings
- <<: *ping
client_info:
<<: *client_info
client_id: client-2
# the channel changes
- <<: *ping
client_info:
<<: *client_info
client_id: client-2
app_channel: not-release
# on another date, to test init
- <<: *ping
submission_timestamp: 2021-04-02T00:00:00
client_info:
<<: *client_info
client_id: client-3

Просмотреть файл

@ -0,0 +1,4 @@
---
- name: submission_date
type: DATE
value: 2021-04-01