* Fix egregious double counting in scalar bucket counts * Update for newer version of black * Update scalar bucket count test to account for combinations * Update minimal test for histogram bucket counts * Add test for multiple clients in histogram aggregates * Remove deduplicated cte in histogram bucket counts * Use count distinct for client counts to be explicit
This commit is contained in:
Родитель
2af5515382
Коммит
ce9fe86ed2
|
@ -104,12 +104,7 @@ def scalar_bucket_counts(**kwargs):
|
|||
def histogram_bucket_counts(**kwargs):
|
||||
"""Variables for clients histogram bucket counts."""
|
||||
attributes_list = ["ping_type", "os", "app_version", "app_build_id", "channel"]
|
||||
metric_attributes_list = [
|
||||
"metric",
|
||||
"metric_type",
|
||||
"key",
|
||||
"agg_type",
|
||||
]
|
||||
metric_attributes_list = ["metric", "metric_type", "key", "agg_type"]
|
||||
fixed_attributes = ["app_version", "channel"]
|
||||
cubed_attributes = [x for x in attributes_list if x not in fixed_attributes]
|
||||
return dict(
|
||||
|
|
|
@ -1,9 +1,6 @@
|
|||
{{ header }}
|
||||
{% from 'macros.sql' import enumerate_table_combinations %}
|
||||
|
||||
{# TODO: remove this import by factoring it out as a proper udf #}
|
||||
{% include "clients_histogram_aggregates_v1.udf.sql" %}
|
||||
|
||||
WITH
|
||||
{{
|
||||
enumerate_table_combinations(
|
||||
|
@ -13,20 +10,6 @@ WITH
|
|||
attribute_combinations
|
||||
)
|
||||
}},
|
||||
-- Ensure there is a single record per client id
|
||||
deduplicated_combos AS (
|
||||
SELECT
|
||||
client_id,
|
||||
{{ attributes }},
|
||||
udf_merged_user_data(
|
||||
ARRAY_CONCAT_AGG(histogram_aggregates)
|
||||
) AS histogram_aggregates
|
||||
FROM
|
||||
all_combos
|
||||
GROUP BY
|
||||
client_id,
|
||||
{{ attributes }}
|
||||
),
|
||||
normalized_histograms AS (
|
||||
SELECT
|
||||
{{ attributes }},
|
||||
|
@ -37,7 +20,7 @@ normalized_histograms AS (
|
|||
FROM unnest(histogram_aggregates)
|
||||
)AS histogram_aggregates
|
||||
FROM
|
||||
deduplicated_combos
|
||||
all_combos
|
||||
),
|
||||
unnested AS (
|
||||
SELECT
|
||||
|
|
|
@ -2,9 +2,6 @@
|
|||
{% include "scalar_bucket_counts_v1.udf.sql" %}
|
||||
{% from 'macros.sql' import enumerate_table_combinations %}
|
||||
|
||||
{# TODO: remove this import by factoring it out as a proper udf #}
|
||||
{% include "clients_scalar_aggregates_v1.udf.sql" %}
|
||||
|
||||
WITH
|
||||
{{
|
||||
enumerate_table_combinations(
|
||||
|
@ -14,20 +11,6 @@ WITH
|
|||
attribute_combinations
|
||||
)
|
||||
}},
|
||||
-- Ensure there is a single record per client id
|
||||
deduplicated_combos AS (
|
||||
SELECT
|
||||
client_id,
|
||||
{{ attributes }},
|
||||
udf_merged_user_data(
|
||||
ARRAY_CONCAT_AGG(scalar_aggregates)
|
||||
) AS scalar_aggregates
|
||||
FROM
|
||||
all_combos
|
||||
GROUP BY
|
||||
client_id,
|
||||
{{ attributes }}
|
||||
),
|
||||
bucketed_booleans AS (
|
||||
SELECT
|
||||
client_id,
|
||||
|
@ -37,17 +20,17 @@ bucketed_booleans AS (
|
|||
NULL AS bucket_count,
|
||||
udf_boolean_buckets(scalar_aggregates) AS scalar_aggregates,
|
||||
FROM
|
||||
deduplicated_combos
|
||||
all_combos
|
||||
),
|
||||
log_min_max AS (
|
||||
SELECT
|
||||
metric,
|
||||
key,
|
||||
LOG(IF(MIN(value) <= 0, 1, MIN(value)), 2) range_min,
|
||||
LOG(IF(MAX(value) <= 0, 1, MAX(value)), 2) range_max,
|
||||
LOG(IF(MIN(value) <= 0, 1, MIN(value)), 2) as range_min,
|
||||
LOG(IF(MAX(value) <= 0, 1, MAX(value)), 2) as range_max,
|
||||
100 as bucket_count
|
||||
FROM
|
||||
deduplicated_combos
|
||||
all_combos
|
||||
CROSS JOIN UNNEST(scalar_aggregates)
|
||||
WHERE
|
||||
metric_type <> "boolean"
|
||||
|
@ -76,7 +59,7 @@ bucketed_scalars AS (
|
|||
FORMAT("%.*f", 2, mozfun.glam.histogram_bucket_from_value(buckets, value) + 0.0001)
|
||||
AS STRING) AS bucket
|
||||
FROM
|
||||
deduplicated_combos
|
||||
all_combos
|
||||
CROSS JOIN UNNEST(scalar_aggregates)
|
||||
LEFT JOIN buckets_by_metric
|
||||
USING(metric, key)
|
||||
|
@ -119,7 +102,8 @@ SELECT
|
|||
range_max,
|
||||
bucket_count,
|
||||
bucket,
|
||||
COUNT(*) AS count
|
||||
-- we could rely on count(*) because there is one row per client and bucket
|
||||
COUNT(DISTINCT client_id) AS count
|
||||
FROM
|
||||
booleans_and_scalars
|
||||
GROUP BY
|
||||
|
|
|
@ -1,43 +1,4 @@
|
|||
-- query for org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1;
|
||||
CREATE TEMP FUNCTION udf_merged_user_data(aggs ANY TYPE)
|
||||
RETURNS ARRAY<
|
||||
STRUCT<
|
||||
metric STRING,
|
||||
metric_type STRING,
|
||||
key STRING,
|
||||
agg_type STRING,
|
||||
value ARRAY<STRUCT<key STRING, value INT64>>
|
||||
>
|
||||
> AS (
|
||||
(
|
||||
WITH unnested AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
UNNEST(aggs)
|
||||
),
|
||||
aggregated_data AS (
|
||||
SELECT AS STRUCT
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type,
|
||||
mozfun.map.sum(ARRAY_CONCAT_AGG(value)) AS value
|
||||
FROM
|
||||
unnested
|
||||
GROUP BY
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type
|
||||
)
|
||||
SELECT
|
||||
ARRAY_AGG((metric, metric_type, key, agg_type, value))
|
||||
FROM
|
||||
aggregated_data
|
||||
)
|
||||
);
|
||||
|
||||
WITH
|
||||
-- Cross join with the attribute combinations to reduce the query complexity
|
||||
-- with respect to the number of operations. A table with n rows cross joined
|
||||
|
@ -72,26 +33,6 @@ all_combos AS (
|
|||
CROSS JOIN
|
||||
static_combos combo
|
||||
),
|
||||
-- Ensure there is a single record per client id
|
||||
deduplicated_combos AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
udf_merged_user_data(ARRAY_CONCAT_AGG(histogram_aggregates)) AS histogram_aggregates
|
||||
FROM
|
||||
all_combos
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
),
|
||||
normalized_histograms AS (
|
||||
SELECT
|
||||
ping_type,
|
||||
|
@ -110,7 +51,7 @@ normalized_histograms AS (
|
|||
UNNEST(histogram_aggregates)
|
||||
) AS histogram_aggregates
|
||||
FROM
|
||||
deduplicated_combos
|
||||
all_combos
|
||||
),
|
||||
unnested AS (
|
||||
SELECT
|
||||
|
|
|
@ -89,92 +89,6 @@ RETURNS ARRAY<
|
|||
)
|
||||
);
|
||||
|
||||
CREATE TEMP FUNCTION udf_merged_user_data(
|
||||
aggs ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>
|
||||
)
|
||||
RETURNS ARRAY<
|
||||
STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>
|
||||
> AS (
|
||||
(
|
||||
WITH unnested AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
UNNEST(aggs)
|
||||
WHERE
|
||||
agg_type != "avg"
|
||||
),
|
||||
aggregated AS (
|
||||
SELECT
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type,
|
||||
--format:off
|
||||
CASE agg_type
|
||||
WHEN 'max' THEN max(value)
|
||||
WHEN 'min' THEN min(value)
|
||||
WHEN 'count' THEN sum(value)
|
||||
WHEN 'sum' THEN sum(value)
|
||||
WHEN 'false' THEN sum(value)
|
||||
WHEN 'true' THEN sum(value)
|
||||
END AS value
|
||||
--format:on
|
||||
FROM
|
||||
unnested
|
||||
WHERE
|
||||
value IS NOT NULL
|
||||
GROUP BY
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type
|
||||
),
|
||||
scalar_count_and_sum AS (
|
||||
SELECT
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
'avg' AS agg_type,
|
||||
--format:off
|
||||
CASE WHEN agg_type = 'count' THEN value ELSE 0 END AS count,
|
||||
CASE WHEN agg_type = 'sum' THEN value ELSE 0 END AS sum
|
||||
--format:on
|
||||
FROM
|
||||
aggregated
|
||||
WHERE
|
||||
agg_type IN ('sum', 'count')
|
||||
),
|
||||
scalar_averages AS (
|
||||
SELECT
|
||||
* EXCEPT (count, sum),
|
||||
SUM(sum) / SUM(count) AS agg_value
|
||||
FROM
|
||||
scalar_count_and_sum
|
||||
GROUP BY
|
||||
metric,
|
||||
metric_type,
|
||||
key,
|
||||
agg_type
|
||||
),
|
||||
merged_data AS (
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
aggregated
|
||||
UNION ALL
|
||||
SELECT
|
||||
*
|
||||
FROM
|
||||
scalar_averages
|
||||
)
|
||||
SELECT
|
||||
ARRAY_AGG((metric, metric_type, key, agg_type, value))
|
||||
FROM
|
||||
merged_data
|
||||
)
|
||||
);
|
||||
|
||||
WITH
|
||||
-- Cross join with the attribute combinations to reduce the query complexity
|
||||
-- with respect to the number of operations. A table with n rows cross joined
|
||||
|
@ -209,26 +123,6 @@ all_combos AS (
|
|||
CROSS JOIN
|
||||
static_combos combo
|
||||
),
|
||||
-- Ensure there is a single record per client id
|
||||
deduplicated_combos AS (
|
||||
SELECT
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel,
|
||||
udf_merged_user_data(ARRAY_CONCAT_AGG(scalar_aggregates)) AS scalar_aggregates
|
||||
FROM
|
||||
all_combos
|
||||
GROUP BY
|
||||
client_id,
|
||||
ping_type,
|
||||
os,
|
||||
app_version,
|
||||
app_build_id,
|
||||
channel
|
||||
),
|
||||
bucketed_booleans AS (
|
||||
SELECT
|
||||
client_id,
|
||||
|
@ -242,17 +136,17 @@ bucketed_booleans AS (
|
|||
NULL AS bucket_count,
|
||||
udf_boolean_buckets(scalar_aggregates) AS scalar_aggregates,
|
||||
FROM
|
||||
deduplicated_combos
|
||||
all_combos
|
||||
),
|
||||
log_min_max AS (
|
||||
SELECT
|
||||
metric,
|
||||
key,
|
||||
LOG(IF(MIN(value) <= 0, 1, MIN(value)), 2) range_min,
|
||||
LOG(IF(MAX(value) <= 0, 1, MAX(value)), 2) range_max,
|
||||
LOG(IF(MIN(value) <= 0, 1, MIN(value)), 2) AS range_min,
|
||||
LOG(IF(MAX(value) <= 0, 1, MAX(value)), 2) AS range_max,
|
||||
100 AS bucket_count
|
||||
FROM
|
||||
deduplicated_combos
|
||||
all_combos
|
||||
CROSS JOIN
|
||||
UNNEST(scalar_aggregates)
|
||||
WHERE
|
||||
|
@ -295,7 +189,7 @@ bucketed_scalars AS (
|
|||
FORMAT("%.*f", 2, mozfun.glam.histogram_bucket_from_value(buckets, value) + 0.0001) AS STRING
|
||||
) AS bucket
|
||||
FROM
|
||||
deduplicated_combos
|
||||
all_combos
|
||||
CROSS JOIN
|
||||
UNNEST(scalar_aggregates)
|
||||
LEFT JOIN
|
||||
|
@ -359,7 +253,8 @@ SELECT
|
|||
range_max,
|
||||
bucket_count,
|
||||
bucket,
|
||||
COUNT(*) AS count
|
||||
-- we could rely on count(*) because there is one row per client and bucket
|
||||
COUNT(DISTINCT client_id) AS count
|
||||
FROM
|
||||
booleans_and_scalars
|
||||
GROUP BY
|
||||
|
|
|
@ -20,6 +20,13 @@ python org_mozilla_fenix_glam_nightly__extract_user_counts_v1/test_minimal/data.
|
|||
pytest -k extract_user_counts
|
||||
```
|
||||
|
||||
The easiest way to generate sql for testing is to run the following script from
|
||||
the project root.
|
||||
|
||||
```bash
|
||||
GENERATE_ONLY=true script/glam/test/test_glean_org_mozilla_fenix_glam_nightly
|
||||
```
|
||||
|
||||
## Creating a new test
|
||||
|
||||
To create a new test, copy the `test_minimal` directory and rename it to reflect
|
||||
|
|
|
@ -32,7 +32,7 @@ CLIENTS_HISTOGRAM_AGGREGATES = [
|
|||
{"key": "1", "value": 0},
|
||||
{"key": "2", "value": 1},
|
||||
],
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
|
@ -53,11 +53,8 @@ CLIENTS_DAILY_HISTOGRAM_AGGREGATES = [
|
|||
"metric_type": "timing_distribution",
|
||||
"key": "",
|
||||
"agg_type": "summed_histogram",
|
||||
"value": [
|
||||
{"key": "0", "value": 1},
|
||||
{"key": "1", "value": 0},
|
||||
],
|
||||
},
|
||||
"value": [{"key": "0", "value": 1}, {"key": "1", "value": 0}],
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
|
@ -82,7 +79,7 @@ EXPECT = [
|
|||
{"key": "1", "value": 0},
|
||||
{"key": "2", "value": 1},
|
||||
],
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
|
|
|
@ -46,7 +46,7 @@ CLIENTS_DAILY_HISTOGRAM_AGGREGATES = [
|
|||
{"key": "112863206", "value": 1},
|
||||
{"key": "123078199", "value": 0},
|
||||
],
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
|
@ -70,7 +70,7 @@ EXPECT = [
|
|||
{"key": "112863206", "value": 1},
|
||||
{"key": "123078199", "value": 0},
|
||||
],
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
|
|
|
@ -18,7 +18,7 @@ VIEW_USER_COUNTS = [
|
|||
"app_build_id": APP_BUILD_ID,
|
||||
"channel": "*",
|
||||
"total_users": 44444,
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
EXPECT = [
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
"""Testing data for query."""
|
||||
|
||||
from pathlib import Path
|
||||
from itertools import product
|
||||
|
||||
import yaml
|
||||
|
||||
|
@ -9,16 +10,19 @@ ROOT = Path(__file__).parent
|
|||
UUID = "df735f02-efe5-4b07-b212-583bb99ba241"
|
||||
SUBMISSION_DATE = "2020-10-01"
|
||||
APP_BUILD_ID = "2020100100"
|
||||
OS = "Android"
|
||||
PING_TYPE = "metrics"
|
||||
|
||||
# NOTE: what happens when channel = "*"?
|
||||
# See the scalar_bucket_counts minimal example for more details on the
|
||||
# preconditions.
|
||||
CLIENTS_HISTOGRAM_AGGREGATES = [
|
||||
{
|
||||
"sample_id": 1,
|
||||
"client_id": UUID,
|
||||
"ping_type": "*",
|
||||
"os": "*",
|
||||
"ping_type": PING_TYPE,
|
||||
"os": OS,
|
||||
"app_version": 84,
|
||||
"app_build_id": "*",
|
||||
"app_build_id": APP_BUILD_ID,
|
||||
"channel": "*",
|
||||
"histogram_aggregates": [
|
||||
{
|
||||
|
@ -27,41 +31,34 @@ CLIENTS_HISTOGRAM_AGGREGATES = [
|
|||
"key": "",
|
||||
"agg_type": "summed_histogram",
|
||||
"value": [
|
||||
{"key": "112863206", "value": 1},
|
||||
{"key": "123078199", "value": 0},
|
||||
{"key": "1", "value": 1},
|
||||
{"key": "2", "value": 0},
|
||||
],
|
||||
},
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
BASE_ROW = {
|
||||
"agg_type": "summed_histogram",
|
||||
"app_build_id": "*",
|
||||
"app_version": 84,
|
||||
"channel": "*",
|
||||
"key": "",
|
||||
"metric": "network_tcp_connection",
|
||||
"metric_type": "timing_distribution",
|
||||
"os": "*",
|
||||
"ping_type": "*",
|
||||
"range_max": 2,
|
||||
"record": {"key": "1", "value": 1.0},
|
||||
}
|
||||
|
||||
EXPECT = [
|
||||
{
|
||||
"agg_type": "summed_histogram",
|
||||
"app_build_id": "*",
|
||||
"app_version": 84,
|
||||
"channel": "*",
|
||||
"key": "",
|
||||
"metric": "network_tcp_connection",
|
||||
"metric_type": "timing_distribution",
|
||||
"os": "*",
|
||||
"ping_type": "*",
|
||||
"range_max": 123078199,
|
||||
"record": {"key": "112863206", "value": 1.0},
|
||||
},
|
||||
{
|
||||
"agg_type": "summed_histogram",
|
||||
"app_build_id": "*",
|
||||
"app_version": 84,
|
||||
"channel": "*",
|
||||
"key": "",
|
||||
"metric": "network_tcp_connection",
|
||||
"metric_type": "timing_distribution",
|
||||
"os": "*",
|
||||
"ping_type": "*",
|
||||
"range_max": 123078199,
|
||||
"record": {"key": "123078199", "value": 0.0},
|
||||
},
|
||||
{**BASE_ROW, **dict(zip(["record", "ping_type", "os", "app_build_id"], values))}
|
||||
for values in product(
|
||||
[{"key": "1", "value": 1.0}, {"key": "2", "value": 0.0}],
|
||||
*zip([PING_TYPE, OS, APP_BUILD_ID], ["*"] * 3),
|
||||
)
|
||||
]
|
||||
|
||||
prefix = "glam_etl"
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 123078199
|
||||
record:
|
||||
key: '112863206'
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
range_max: 2
|
||||
record: &id001
|
||||
key: '1'
|
||||
value: 1.0
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
|
@ -18,9 +18,163 @@
|
|||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
range_max: 2
|
||||
record: *id001
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: metrics
|
||||
range_max: 2
|
||||
record: *id001
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: metrics
|
||||
range_max: 2
|
||||
record: *id001
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: '*'
|
||||
range_max: 2
|
||||
record: *id001
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: '*'
|
||||
range_max: 2
|
||||
record: *id001
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 123078199
|
||||
record:
|
||||
key: '123078199'
|
||||
range_max: 2
|
||||
record: *id001
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 2
|
||||
record: *id001
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
range_max: 2
|
||||
record: &id002
|
||||
key: '2'
|
||||
value: 0.0
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
range_max: 2
|
||||
record: *id002
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: metrics
|
||||
range_max: 2
|
||||
record: *id002
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: metrics
|
||||
range_max: 2
|
||||
record: *id002
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: '*'
|
||||
range_max: 2
|
||||
record: *id002
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: '*'
|
||||
range_max: 2
|
||||
record: *id002
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 2
|
||||
record: *id002
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 2
|
||||
record: *id002
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
- app_build_id: '*'
|
||||
- app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
client_id: df735f02-efe5-4b07-b212-583bb99ba241
|
||||
|
@ -8,10 +8,10 @@
|
|||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
value:
|
||||
- key: '112863206'
|
||||
- key: '1'
|
||||
value: 1
|
||||
- key: '123078199'
|
||||
- key: '2'
|
||||
value: 0
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
sample_id: 1
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
"""Testing data for query."""
|
||||
|
||||
from pathlib import Path
|
||||
from itertools import product
|
||||
from uuid import uuid4
|
||||
|
||||
import yaml
|
||||
|
||||
ROOT = Path(__file__).parent
|
||||
|
||||
SUBMISSION_DATE = "2020-10-01"
|
||||
APP_BUILD_ID = "2020100100"
|
||||
OS = "Android"
|
||||
PING_TYPE = "metrics"
|
||||
|
||||
|
||||
# NOTE: each client contributes a total of 1 to the final aggregate
|
||||
CLIENTS_HISTOGRAM_AGGREGATES = [
|
||||
{
|
||||
"sample_id": 1,
|
||||
"client_id": str(uuid4()),
|
||||
"ping_type": PING_TYPE,
|
||||
"os": OS,
|
||||
"app_version": 84,
|
||||
"app_build_id": APP_BUILD_ID,
|
||||
"channel": "*",
|
||||
"histogram_aggregates": [
|
||||
{
|
||||
"metric": "network_tcp_connection",
|
||||
"metric_type": "timing_distribution",
|
||||
"key": "",
|
||||
"agg_type": "summed_histogram",
|
||||
"value": [
|
||||
{"key": "1", "value": 1},
|
||||
{"key": "2", "value": 0},
|
||||
],
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
"sample_id": 1,
|
||||
"client_id": str(uuid4()),
|
||||
"ping_type": PING_TYPE,
|
||||
"os": OS,
|
||||
"app_version": 84,
|
||||
"app_build_id": APP_BUILD_ID,
|
||||
"channel": "*",
|
||||
"histogram_aggregates": [
|
||||
{
|
||||
"metric": "network_tcp_connection",
|
||||
"metric_type": "timing_distribution",
|
||||
"key": "",
|
||||
"agg_type": "summed_histogram",
|
||||
"value": [
|
||||
{"key": "1", "value": 1},
|
||||
{"key": "3", "value": 1},
|
||||
],
|
||||
}
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
BASE_ROW = {
|
||||
"agg_type": "summed_histogram",
|
||||
"app_build_id": "*",
|
||||
"app_version": 84,
|
||||
"channel": "*",
|
||||
"key": "",
|
||||
"metric": "network_tcp_connection",
|
||||
"metric_type": "timing_distribution",
|
||||
"os": "*",
|
||||
"ping_type": "*",
|
||||
"range_max": 3,
|
||||
"record": {"key": "1", "value": 1.0},
|
||||
}
|
||||
|
||||
EXPECT = [
|
||||
{**BASE_ROW, **dict(zip(["record", "ping_type", "os", "app_build_id"], values))}
|
||||
for values in product(
|
||||
[
|
||||
{"key": "1", "value": 1.5},
|
||||
{"key": "2", "value": 0.0},
|
||||
{"key": "3", "value": 0.5},
|
||||
],
|
||||
*zip([PING_TYPE, OS, APP_BUILD_ID], ["*"] * 3),
|
||||
)
|
||||
]
|
||||
|
||||
prefix = "glam_etl"
|
||||
tables = [
|
||||
(
|
||||
f"{prefix}.org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1.yaml",
|
||||
CLIENTS_HISTOGRAM_AGGREGATES,
|
||||
),
|
||||
("expect.yaml", EXPECT),
|
||||
]
|
||||
for name, data in tables:
|
||||
with (ROOT / name).open("w") as fp:
|
||||
yaml.dump(data, fp)
|
|
@ -0,0 +1,270 @@
|
|||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
range_max: 3
|
||||
record: &id001
|
||||
key: '1'
|
||||
value: 1.5
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
range_max: 3
|
||||
record: *id001
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: metrics
|
||||
range_max: 3
|
||||
record: *id001
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: metrics
|
||||
range_max: 3
|
||||
record: *id001
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: '*'
|
||||
range_max: 3
|
||||
record: *id001
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: '*'
|
||||
range_max: 3
|
||||
record: *id001
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 3
|
||||
record: *id001
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 3
|
||||
record: *id001
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
range_max: 3
|
||||
record: &id002
|
||||
key: '2'
|
||||
value: 0.0
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
range_max: 3
|
||||
record: *id002
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: metrics
|
||||
range_max: 3
|
||||
record: *id002
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: metrics
|
||||
range_max: 3
|
||||
record: *id002
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: '*'
|
||||
range_max: 3
|
||||
record: *id002
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: '*'
|
||||
range_max: 3
|
||||
record: *id002
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 3
|
||||
record: *id002
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 3
|
||||
record: *id002
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
range_max: 3
|
||||
record: &id003
|
||||
key: '3'
|
||||
value: 0.5
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
range_max: 3
|
||||
record: *id003
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: metrics
|
||||
range_max: 3
|
||||
record: *id003
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: metrics
|
||||
range_max: 3
|
||||
record: *id003
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: '*'
|
||||
range_max: 3
|
||||
record: *id003
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: Android
|
||||
ping_type: '*'
|
||||
range_max: 3
|
||||
record: *id003
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 3
|
||||
record: *id003
|
||||
- agg_type: summed_histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 3
|
||||
record: *id003
|
|
@ -0,0 +1,47 @@
|
|||
- mode: NULLABLE
|
||||
name: sample_id
|
||||
type: INTEGER
|
||||
- mode: NULLABLE
|
||||
name: client_id
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: ping_type
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: os
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: app_version
|
||||
type: INTEGER
|
||||
- mode: NULLABLE
|
||||
name: app_build_id
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: channel
|
||||
type: STRING
|
||||
- fields:
|
||||
- mode: NULLABLE
|
||||
name: metric
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: metric_type
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: key
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: agg_type
|
||||
type: STRING
|
||||
- fields:
|
||||
- mode: NULLABLE
|
||||
name: key
|
||||
type: STRING
|
||||
- mode: NULLABLE
|
||||
name: value
|
||||
type: INTEGER
|
||||
mode: REPEATED
|
||||
name: value
|
||||
type: RECORD
|
||||
mode: REPEATED
|
||||
name: histogram_aggregates
|
||||
type: RECORD
|
|
@ -0,0 +1,34 @@
|
|||
- app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
client_id: 5c2364fc-46d4-4537-9575-a8ddaba2b42f
|
||||
histogram_aggregates:
|
||||
- agg_type: summed_histogram
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
value:
|
||||
- key: '1'
|
||||
value: 1
|
||||
- key: '2'
|
||||
value: 0
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
sample_id: 1
|
||||
- app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
client_id: 813d66e1-57dc-46aa-84ec-682b48cc2e3c
|
||||
histogram_aggregates:
|
||||
- agg_type: summed_histogram
|
||||
key: ''
|
||||
metric: network_tcp_connection
|
||||
metric_type: timing_distribution
|
||||
value:
|
||||
- key: '1'
|
||||
value: 1
|
||||
- key: '3'
|
||||
value: 1
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
sample_id: 1
|
|
@ -0,0 +1,12 @@
|
|||
- name: submission_date
|
||||
type: DATE
|
||||
value: '2020-10-01'
|
||||
- name: min_sample_id
|
||||
type: INT64
|
||||
value: 0
|
||||
- name: max_sample_id
|
||||
type: INT64
|
||||
value: 99
|
||||
- name: sample_size
|
||||
type: INT64
|
||||
value: 100
|
|
@ -1,6 +1,7 @@
|
|||
"""Testing data for query."""
|
||||
from pathlib import Path
|
||||
from uuid import uuid4
|
||||
from itertools import product
|
||||
|
||||
import yaml
|
||||
|
||||
|
@ -8,13 +9,18 @@ ROOT = Path(__file__).parent
|
|||
|
||||
SUBMISSION_DATE = "2020-10-01"
|
||||
APP_BUILD_ID = "2020100100"
|
||||
OS = "Android"
|
||||
PING_TYPE = "metrics"
|
||||
|
||||
# Other tests: non * fields
|
||||
# Testing precondition: ping_type, os, and app_build_id must not be "*". See
|
||||
# models.py under the scalar_bucket_counts parameters to see that sets fields
|
||||
# are used in the static combinations. If these are set to "*", then they will
|
||||
# be double counted...
|
||||
CLIENTS_SCALAR_AGGREGATES = [
|
||||
{
|
||||
"client_id": str(uuid4()),
|
||||
"ping_type": "*",
|
||||
"os": "*",
|
||||
"ping_type": PING_TYPE,
|
||||
"os": OS,
|
||||
"app_version": 84,
|
||||
"app_build_id": APP_BUILD_ID,
|
||||
"channel": "*",
|
||||
|
@ -30,8 +36,8 @@ CLIENTS_SCALAR_AGGREGATES = [
|
|||
},
|
||||
{
|
||||
"client_id": str(uuid4()),
|
||||
"ping_type": "*",
|
||||
"os": "*",
|
||||
"ping_type": PING_TYPE,
|
||||
"os": OS,
|
||||
"app_version": 84,
|
||||
"app_build_id": APP_BUILD_ID,
|
||||
"channel": "*",
|
||||
|
@ -47,108 +53,36 @@ CLIENTS_SCALAR_AGGREGATES = [
|
|||
},
|
||||
]
|
||||
|
||||
# TODO: why are the range_min and range_max set at these values?
|
||||
# we must generate the set of combinations. Each one of these have the same
|
||||
# values though.
|
||||
|
||||
BASE_ROW = {
|
||||
"agg_type": "histogram",
|
||||
"app_build_id": "*",
|
||||
"app_version": 84,
|
||||
"bucket": "4.00",
|
||||
"bucket_count": 100,
|
||||
"channel": "*",
|
||||
"client_agg_type": "count",
|
||||
"count": 1,
|
||||
"key": "",
|
||||
"metric": "places_manager_write_query_count",
|
||||
"metric_type": "counter",
|
||||
"os": "*",
|
||||
"ping_type": "*",
|
||||
"range_max": 3.0,
|
||||
"range_min": 2.0,
|
||||
}
|
||||
|
||||
# Didn't intend to code golf. This enumerates all of the "static combinations"
|
||||
# by taking the cross product of all values. Each of these can take on a value
|
||||
# from each of the clients above. Since each attribute combination has a single
|
||||
# client, we do not have to change the "count" in the base row.
|
||||
EXPECT = [
|
||||
{
|
||||
"agg_type": "histogram",
|
||||
"app_build_id": "*",
|
||||
"app_version": 84,
|
||||
"bucket": "16.00",
|
||||
"bucket_count": 100,
|
||||
"channel": "*",
|
||||
"client_agg_type": "count",
|
||||
"count": 1,
|
||||
"key": "",
|
||||
"metric": "places_manager_write_query_count",
|
||||
"metric_type": "counter",
|
||||
"os": "*",
|
||||
"ping_type": "*",
|
||||
"range_max": 5.0,
|
||||
"range_min": 0.0,
|
||||
},
|
||||
{
|
||||
"agg_type": "histogram",
|
||||
"app_build_id": "*",
|
||||
"app_version": 84,
|
||||
"bucket": "32.00",
|
||||
"bucket_count": 100,
|
||||
"channel": "*",
|
||||
"client_agg_type": "count",
|
||||
"count": 1,
|
||||
"key": "",
|
||||
"metric": "places_manager_write_query_count",
|
||||
"metric_type": "counter",
|
||||
"os": "*",
|
||||
"ping_type": "*",
|
||||
"range_max": 5.0,
|
||||
"range_min": 0.0,
|
||||
},
|
||||
{
|
||||
"agg_type": "histogram",
|
||||
"app_build_id": "*",
|
||||
"app_version": 84,
|
||||
"bucket_count": 100,
|
||||
"channel": "*",
|
||||
"client_agg_type": "avg",
|
||||
"count": 2,
|
||||
"key": "",
|
||||
"metric": "places_manager_write_query_count",
|
||||
"metric_type": "counter",
|
||||
"os": "*",
|
||||
"ping_type": "*",
|
||||
"range_max": 5.0,
|
||||
"range_min": 0.0,
|
||||
},
|
||||
{
|
||||
"agg_type": "histogram",
|
||||
"app_build_id": "2020100100",
|
||||
"app_version": 84,
|
||||
"bucket": "16.00",
|
||||
"bucket_count": 100,
|
||||
"channel": "*",
|
||||
"client_agg_type": "count",
|
||||
"count": 1,
|
||||
"key": "",
|
||||
"metric": "places_manager_write_query_count",
|
||||
"metric_type": "counter",
|
||||
"os": "*",
|
||||
"ping_type": "*",
|
||||
"range_max": 5.0,
|
||||
"range_min": 0.0,
|
||||
},
|
||||
{
|
||||
"agg_type": "histogram",
|
||||
"app_build_id": "2020100100",
|
||||
"app_version": 84,
|
||||
"bucket": "32.00",
|
||||
"bucket_count": 100,
|
||||
"channel": "*",
|
||||
"client_agg_type": "count",
|
||||
"count": 1,
|
||||
"key": "",
|
||||
"metric": "places_manager_write_query_count",
|
||||
"metric_type": "counter",
|
||||
"os": "*",
|
||||
"ping_type": "*",
|
||||
"range_max": 5.0,
|
||||
"range_min": 0.0,
|
||||
},
|
||||
{
|
||||
"agg_type": "histogram",
|
||||
"app_build_id": "2020100100",
|
||||
"app_version": 84,
|
||||
"bucket_count": 100,
|
||||
"channel": "*",
|
||||
"client_agg_type": "avg",
|
||||
"count": 2,
|
||||
"key": "",
|
||||
"metric": "places_manager_write_query_count",
|
||||
"metric_type": "counter",
|
||||
"os": "*",
|
||||
"ping_type": "*",
|
||||
"range_max": 5.0,
|
||||
"range_min": 0.0,
|
||||
},
|
||||
{**BASE_ROW, **dict(zip(["bucket", "ping_type", "os", "app_build_id"], values))}
|
||||
for values in product(
|
||||
["4.00", "8.00"], *zip([PING_TYPE, OS, APP_BUILD_ID], ["*"] * 3)
|
||||
)
|
||||
]
|
||||
|
||||
prefix = "glam_etl"
|
||||
|
|
|
@ -1,51 +1,97 @@
|
|||
- agg_type: histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
bucket: '16.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 5.0
|
||||
range_min: 0.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
bucket: '32.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 5.0
|
||||
range_min: 0.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: avg
|
||||
count: 2
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 5.0
|
||||
range_min: 0.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
bucket: '16.00'
|
||||
bucket: '4.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
bucket: '4.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
bucket: '4.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: '*'
|
||||
ping_type: metrics
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
bucket: '4.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: '*'
|
||||
ping_type: metrics
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
bucket: '4.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: Android
|
||||
ping_type: '*'
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
bucket: '4.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: Android
|
||||
ping_type: '*'
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
bucket: '4.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
|
@ -55,12 +101,12 @@
|
|||
metric_type: counter
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 5.0
|
||||
range_min: 0.0
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '2020100100'
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
bucket: '32.00'
|
||||
bucket: '4.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
|
@ -70,19 +116,125 @@
|
|||
metric_type: counter
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 5.0
|
||||
range_min: 0.0
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
bucket: '8.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: avg
|
||||
count: 2
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
bucket: '8.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
bucket: '8.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: '*'
|
||||
ping_type: metrics
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
bucket: '8.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: '*'
|
||||
ping_type: metrics
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
bucket: '8.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: Android
|
||||
ping_type: '*'
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
bucket: '8.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: Android
|
||||
ping_type: '*'
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
bucket: '8.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 5.0
|
||||
range_min: 0.0
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
- agg_type: histogram
|
||||
app_build_id: '*'
|
||||
app_version: 84
|
||||
bucket: '8.00'
|
||||
bucket_count: 100
|
||||
channel: '*'
|
||||
client_agg_type: count
|
||||
count: 1
|
||||
key: ''
|
||||
metric: places_manager_write_query_count
|
||||
metric_type: counter
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
range_max: 3.0
|
||||
range_min: 2.0
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
- app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
client_id: e4b68766-0c07-4896-92d3-0f920dc202f0
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
client_id: 885422ea-a5fb-489e-b5ac-efa2d57d22f4
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
scalar_aggregates:
|
||||
- agg_type: count
|
||||
key: ''
|
||||
|
@ -13,9 +13,9 @@
|
|||
- app_build_id: '2020100100'
|
||||
app_version: 84
|
||||
channel: '*'
|
||||
client_id: 8d20cc29-7bd8-4595-85a0-43aecc9e5432
|
||||
os: '*'
|
||||
ping_type: '*'
|
||||
client_id: 8f206cdb-95d1-46d1-8295-7cc033c76b87
|
||||
os: Android
|
||||
ping_type: metrics
|
||||
scalar_aggregates:
|
||||
- agg_type: count
|
||||
key: ''
|
||||
|
|
|
@ -24,7 +24,7 @@ CLIENTS_SCALAR_AGGREGATES = [
|
|||
"key": "",
|
||||
"agg_type": "count",
|
||||
"value": 4.0,
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
{
|
||||
|
@ -41,7 +41,7 @@ CLIENTS_SCALAR_AGGREGATES = [
|
|||
"key": "",
|
||||
"agg_type": "count",
|
||||
"value": 8.0,
|
||||
},
|
||||
}
|
||||
],
|
||||
},
|
||||
]
|
||||
|
|
|
@ -32,9 +32,9 @@ EXPECT = [
|
|||
{
|
||||
"agg_type": "histogram",
|
||||
"aggregates": [
|
||||
{"key": "1.00", "value": 0.16666666666666666},
|
||||
{"key": "2.00", "value": 0.6666666666666666},
|
||||
{"key": "4.00", "value": 0.16666666666666666},
|
||||
{"key": "1.00", "value": 0.166_666_666_666_666_66},
|
||||
{"key": "2.00", "value": 0.666_666_666_666_666_6},
|
||||
{"key": "4.00", "value": 0.166_666_666_666_666_66},
|
||||
],
|
||||
"app_build_id": "*",
|
||||
"app_version": 84,
|
||||
|
|
|
@ -41,10 +41,7 @@ def main(test_name):
|
|||
# function. We'll also include dates in the future. There is a new
|
||||
# version every day.
|
||||
rows = [input_row(i, i, i) for i in range(-10, HISTORY_DAYS + 2)]
|
||||
yaml.dump(
|
||||
sorted(rows, key=lambda x: x["client_info"]["app_build"]) * 6,
|
||||
fp,
|
||||
)
|
||||
yaml.dump(sorted(rows, key=lambda x: x["client_info"]["app_build"]) * 6, fp)
|
||||
# bad rows, versions less than 100 put before and after the 100 mark. The
|
||||
# one for fenix will probably get filtered out because of the channel norm
|
||||
# udf.
|
||||
|
|
Загрузка…
Ссылка в новой задаче