Fix #1587 - fix inconsistent range_min and range_max in bucket counts (#1591)

* Fix egregious double counting in scalar bucket counts * Update for newer version of black * Update scalar bucket count test to account for combinations * Update minimal test for histogram bucket counts * Add test for multiple clients in histogram aggregates * Remove deduplicated cte in histogram bucket counts * Use count distinct for client counts to be explicit
2020-12-04 14:47:45 -08:00 · 2020-12-04 14:47:45 -08:00 · ce9fe86ed2
--- a/bigquery_etl/glam/models.py
+++ b/bigquery_etl/glam/models.py
@ -104,12 +104,7 @@ def scalar_bucket_counts(**kwargs):
 def histogram_bucket_counts(**kwargs):
    """Variables for clients histogram bucket counts."""
    attributes_list = ["ping_type", "os", "app_version", "app_build_id", "channel"]
-    metric_attributes_list = [
-        "metric",
-        "metric_type",
-        "key",
-        "agg_type",
-    ]
+    metric_attributes_list = ["metric", "metric_type", "key", "agg_type"]
    fixed_attributes = ["app_version", "channel"]
    cubed_attributes = [x for x in attributes_list if x not in fixed_attributes]
    return dict(
--- a/bigquery_etl/glam/templates/histogram_bucket_counts_v1.sql
+++ b/bigquery_etl/glam/templates/histogram_bucket_counts_v1.sql
@ -1,9 +1,6 @@
 {{ header }}
 {% from 'macros.sql' import enumerate_table_combinations %}

-{# TODO: remove this import by factoring it out as a proper udf #}
-{% include "clients_histogram_aggregates_v1.udf.sql" %}
-
 WITH
 {{
    enumerate_table_combinations(
@ -13,20 +10,6 @@ WITH
        attribute_combinations
    )
 }},
-- Ensure there is a single record per client id
-deduplicated_combos AS (
-  SELECT
-    client_id,
-    {{ attributes }},
-    udf_merged_user_data(
-      ARRAY_CONCAT_AGG(histogram_aggregates)
-    ) AS histogram_aggregates
-  FROM
-    all_combos
-  GROUP BY
-    client_id,
-    {{ attributes }}
-),
 normalized_histograms AS (
  SELECT
    {{ attributes }},
@ -37,7 +20,7 @@ normalized_histograms AS (
      FROM unnest(histogram_aggregates)
    )AS histogram_aggregates
  FROM
-    deduplicated_combos
+    all_combos
 ),
 unnested AS (
  SELECT
--- a/bigquery_etl/glam/templates/scalar_bucket_counts_v1.sql
+++ b/bigquery_etl/glam/templates/scalar_bucket_counts_v1.sql
@ -2,9 +2,6 @@
 {% include "scalar_bucket_counts_v1.udf.sql" %}
 {% from 'macros.sql' import enumerate_table_combinations %}

-{# TODO: remove this import by factoring it out as a proper udf #}
-{% include "clients_scalar_aggregates_v1.udf.sql" %}
-
 WITH
 {{
    enumerate_table_combinations(
@ -14,20 +11,6 @@ WITH
        attribute_combinations
    )
 }},
-- Ensure there is a single record per client id
-deduplicated_combos AS (
-  SELECT
-    client_id,
-    {{ attributes }},
-    udf_merged_user_data(
-      ARRAY_CONCAT_AGG(scalar_aggregates)
-    ) AS scalar_aggregates
-  FROM
-    all_combos
-  GROUP BY
-    client_id,
-    {{ attributes }}
-),
 bucketed_booleans AS (
  SELECT
    client_id,
@ -37,17 +20,17 @@ bucketed_booleans AS (
    NULL AS bucket_count,
    udf_boolean_buckets(scalar_aggregates) AS scalar_aggregates,
  FROM
-    deduplicated_combos
+    all_combos
 ),
 log_min_max AS (
  SELECT
    metric,
    key,
-    LOG(IF(MIN(value) <= 0, 1, MIN(value)), 2) range_min,
-    LOG(IF(MAX(value) <= 0, 1, MAX(value)), 2) range_max,
+    LOG(IF(MIN(value) <= 0, 1, MIN(value)), 2) as range_min,
+    LOG(IF(MAX(value) <= 0, 1, MAX(value)), 2) as range_max,
    100 as bucket_count
  FROM
-    deduplicated_combos
+    all_combos
    CROSS JOIN UNNEST(scalar_aggregates)
  WHERE
    metric_type <> "boolean"
@ -76,7 +59,7 @@ bucketed_scalars AS (
      FORMAT("%.*f", 2, mozfun.glam.histogram_bucket_from_value(buckets, value) + 0.0001)
      AS STRING) AS bucket
  FROM
-    deduplicated_combos
+    all_combos
  CROSS JOIN UNNEST(scalar_aggregates)
  LEFT JOIN buckets_by_metric
    USING(metric, key)
@ -119,7 +102,8 @@ SELECT
  range_max,
  bucket_count,
  bucket,
-  COUNT(*) AS count
+  -- we could rely on count(*) because there is one row per client and bucket
+  COUNT(DISTINCT client_id) AS count
 FROM
  booleans_and_scalars
 GROUP BY
--- a/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/query.sql
+++ b/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/query.sql
@ -1,43 +1,4 @@
 -- query for org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1;
-CREATE TEMP FUNCTION udf_merged_user_data(aggs ANY TYPE)
-RETURNS ARRAY<
-  STRUCT<
-    metric STRING,
-    metric_type STRING,
-    key STRING,
-    agg_type STRING,
-    value ARRAY<STRUCT<key STRING, value INT64>>
-  >
-> AS (
-  (
-    WITH unnested AS (
-      SELECT
-        *
-      FROM
-        UNNEST(aggs)
-    ),
-    aggregated_data AS (
-      SELECT AS STRUCT
-        metric,
-        metric_type,
-        key,
-        agg_type,
-        mozfun.map.sum(ARRAY_CONCAT_AGG(value)) AS value
-      FROM
-        unnested
-      GROUP BY
-        metric,
-        metric_type,
-        key,
-        agg_type
-    )
-    SELECT
-      ARRAY_AGG((metric, metric_type, key, agg_type, value))
-    FROM
-      aggregated_data
-  )
-);
-
 WITH
 -- Cross join with the attribute combinations to reduce the query complexity
 -- with respect to the number of operations. A table with n rows cross joined
@ -72,26 +33,6 @@ all_combos AS (
  CROSS JOIN
    static_combos combo
 ),
-- Ensure there is a single record per client id
-deduplicated_combos AS (
-  SELECT
-    client_id,
-    ping_type,
-    os,
-    app_version,
-    app_build_id,
-    channel,
-    udf_merged_user_data(ARRAY_CONCAT_AGG(histogram_aggregates)) AS histogram_aggregates
-  FROM
-    all_combos
-  GROUP BY
-    client_id,
-    ping_type,
-    os,
-    app_version,
-    app_build_id,
-    channel
-),
 normalized_histograms AS (
  SELECT
    ping_type,
@ -110,7 +51,7 @@ normalized_histograms AS (
        UNNEST(histogram_aggregates)
    ) AS histogram_aggregates
  FROM
-    deduplicated_combos
+    all_combos
 ),
 unnested AS (
  SELECT
--- a/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/query.sql
+++ b/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/query.sql
@ -89,92 +89,6 @@ RETURNS ARRAY<
  )
 );

-CREATE TEMP FUNCTION udf_merged_user_data(
-  aggs ARRAY<STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>>
-)
-RETURNS ARRAY<
-  STRUCT<metric STRING, metric_type STRING, key STRING, agg_type STRING, value FLOAT64>
-> AS (
-  (
-    WITH unnested AS (
-      SELECT
-        *
-      FROM
-        UNNEST(aggs)
-      WHERE
-        agg_type != "avg"
-    ),
-    aggregated AS (
-      SELECT
-        metric,
-        metric_type,
-        key,
-        agg_type,
-        --format:off
-        CASE agg_type
-          WHEN 'max' THEN max(value)
-          WHEN 'min' THEN min(value)
-          WHEN 'count' THEN sum(value)
-          WHEN 'sum' THEN sum(value)
-          WHEN 'false' THEN sum(value)
-          WHEN 'true' THEN sum(value)
-        END AS value
-        --format:on
-      FROM
-        unnested
-      WHERE
-        value IS NOT NULL
-      GROUP BY
-        metric,
-        metric_type,
-        key,
-        agg_type
-    ),
-    scalar_count_and_sum AS (
-      SELECT
-        metric,
-        metric_type,
-        key,
-        'avg' AS agg_type,
-        --format:off
-        CASE WHEN agg_type = 'count' THEN value ELSE 0 END AS count,
-        CASE WHEN agg_type = 'sum' THEN value ELSE 0 END AS sum
-        --format:on
-      FROM
-        aggregated
-      WHERE
-        agg_type IN ('sum', 'count')
-    ),
-    scalar_averages AS (
-      SELECT
-        * EXCEPT (count, sum),
-        SUM(sum) / SUM(count) AS agg_value
-      FROM
-        scalar_count_and_sum
-      GROUP BY
-        metric,
-        metric_type,
-        key,
-        agg_type
-    ),
-    merged_data AS (
-      SELECT
-        *
-      FROM
-        aggregated
-      UNION ALL
-      SELECT
-        *
-      FROM
-        scalar_averages
-    )
-    SELECT
-      ARRAY_AGG((metric, metric_type, key, agg_type, value))
-    FROM
-      merged_data
-  )
-);
-
 WITH
 -- Cross join with the attribute combinations to reduce the query complexity
 -- with respect to the number of operations. A table with n rows cross joined
@ -209,26 +123,6 @@ all_combos AS (
  CROSS JOIN
    static_combos combo
 ),
-- Ensure there is a single record per client id
-deduplicated_combos AS (
-  SELECT
-    client_id,
-    ping_type,
-    os,
-    app_version,
-    app_build_id,
-    channel,
-    udf_merged_user_data(ARRAY_CONCAT_AGG(scalar_aggregates)) AS scalar_aggregates
-  FROM
-    all_combos
-  GROUP BY
-    client_id,
-    ping_type,
-    os,
-    app_version,
-    app_build_id,
-    channel
-),
 bucketed_booleans AS (
  SELECT
    client_id,
@ -242,17 +136,17 @@ bucketed_booleans AS (
    NULL AS bucket_count,
    udf_boolean_buckets(scalar_aggregates) AS scalar_aggregates,
  FROM
-    deduplicated_combos
+    all_combos
 ),
 log_min_max AS (
  SELECT
    metric,
    key,
-    LOG(IF(MIN(value) <= 0, 1, MIN(value)), 2) range_min,
-    LOG(IF(MAX(value) <= 0, 1, MAX(value)), 2) range_max,
+    LOG(IF(MIN(value) <= 0, 1, MIN(value)), 2) AS range_min,
+    LOG(IF(MAX(value) <= 0, 1, MAX(value)), 2) AS range_max,
    100 AS bucket_count
  FROM
-    deduplicated_combos
+    all_combos
  CROSS JOIN
    UNNEST(scalar_aggregates)
  WHERE
@ -295,7 +189,7 @@ bucketed_scalars AS (
      FORMAT("%.*f", 2, mozfun.glam.histogram_bucket_from_value(buckets, value) + 0.0001) AS STRING
    ) AS bucket
  FROM
-    deduplicated_combos
+    all_combos
  CROSS JOIN
    UNNEST(scalar_aggregates)
  LEFT JOIN
@ -359,7 +253,8 @@ SELECT
  range_max,
  bucket_count,
  bucket,
-  COUNT(*) AS count
+  -- we could rely on count(*) because there is one row per client and bucket
+  COUNT(DISTINCT client_id) AS count
 FROM
  booleans_and_scalars
 GROUP BY
--- a/tests/sql/glam-fenix-dev/glam_etl/README.md
+++ b/tests/sql/glam-fenix-dev/glam_etl/README.md
@ -20,6 +20,13 @@ python org_mozilla_fenix_glam_nightly__extract_user_counts_v1/test_minimal/data.
 pytest -k extract_user_counts
 ```

+The easiest way to generate sql for testing is to run the following script from
+the project root.
+
+```bash
+GENERATE_ONLY=true script/glam/test/test_glean_org_mozilla_fenix_glam_nightly
+```
+
 ## Creating a new test

 To create a new test, copy the `test_minimal` directory and rename it to reflect
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1/test_incremental_aggregate/data.py
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1/test_incremental_aggregate/data.py
@ -32,7 +32,7 @@ CLIENTS_HISTOGRAM_AGGREGATES = [
                    {"key": "1", "value": 0},
                    {"key": "2", "value": 1},
                ],
-            },
+            }
        ],
    }
 ]
@ -53,11 +53,8 @@ CLIENTS_DAILY_HISTOGRAM_AGGREGATES = [
                "metric_type": "timing_distribution",
                "key": "",
                "agg_type": "summed_histogram",
-                "value": [
-                    {"key": "0", "value": 1},
-                    {"key": "1", "value": 0},
-                ],
-            },
+                "value": [{"key": "0", "value": 1}, {"key": "1", "value": 0}],
+            }
        ],
    }
 ]
@ -82,7 +79,7 @@ EXPECT = [
                    {"key": "1", "value": 0},
                    {"key": "2", "value": 1},
                ],
-            },
+            }
        ],
    }
 ]
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1/test_minimal/data.py
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1/test_minimal/data.py
@ -46,7 +46,7 @@ CLIENTS_DAILY_HISTOGRAM_AGGREGATES = [
                    {"key": "112863206", "value": 1},
                    {"key": "123078199", "value": 0},
                ],
-            },
+            }
        ],
    }
 ]
@ -70,7 +70,7 @@ EXPECT = [
                    {"key": "112863206", "value": 1},
                    {"key": "123078199", "value": 0},
                ],
-            },
+            }
        ],
    }
 ]
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__extract_user_counts_v1/test_minimal/data.py
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__extract_user_counts_v1/test_minimal/data.py
@ -18,7 +18,7 @@ VIEW_USER_COUNTS = [
        "app_build_id": APP_BUILD_ID,
        "channel": "*",
        "total_users": 44444,
-    },
+    }
 ]

 EXPECT = [
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_minimal/data.py
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_minimal/data.py
@ -1,6 +1,7 @@
 """Testing data for query."""

 from pathlib import Path
+from itertools import product

 import yaml

@ -9,16 +10,19 @@ ROOT = Path(__file__).parent
 UUID = "df735f02-efe5-4b07-b212-583bb99ba241"
 SUBMISSION_DATE = "2020-10-01"
 APP_BUILD_ID = "2020100100"
+OS = "Android"
+PING_TYPE = "metrics"

-# NOTE: what happens when channel = "*"?
+# See the scalar_bucket_counts minimal example for more details on the
+# preconditions.
 CLIENTS_HISTOGRAM_AGGREGATES = [
    {
        "sample_id": 1,
        "client_id": UUID,
-        "ping_type": "*",
-        "os": "*",
+        "ping_type": PING_TYPE,
+        "os": OS,
        "app_version": 84,
-        "app_build_id": "*",
+        "app_build_id": APP_BUILD_ID,
        "channel": "*",
        "histogram_aggregates": [
            {
@ -27,41 +31,34 @@ CLIENTS_HISTOGRAM_AGGREGATES = [
                "key": "",
                "agg_type": "summed_histogram",
                "value": [
-                    {"key": "112863206", "value": 1},
-                    {"key": "123078199", "value": 0},
+                    {"key": "1", "value": 1},
+                    {"key": "2", "value": 0},
                ],
-            },
+            }
        ],
    }
 ]

+BASE_ROW = {
+    "agg_type": "summed_histogram",
+    "app_build_id": "*",
+    "app_version": 84,
+    "channel": "*",
+    "key": "",
+    "metric": "network_tcp_connection",
+    "metric_type": "timing_distribution",
+    "os": "*",
+    "ping_type": "*",
+    "range_max": 2,
+    "record": {"key": "1", "value": 1.0},
+}
+
 EXPECT = [
-    {
-        "agg_type": "summed_histogram",
-        "app_build_id": "*",
-        "app_version": 84,
-        "channel": "*",
-        "key": "",
-        "metric": "network_tcp_connection",
-        "metric_type": "timing_distribution",
-        "os": "*",
-        "ping_type": "*",
-        "range_max": 123078199,
-        "record": {"key": "112863206", "value": 1.0},
-    },
-    {
-        "agg_type": "summed_histogram",
-        "app_build_id": "*",
-        "app_version": 84,
-        "channel": "*",
-        "key": "",
-        "metric": "network_tcp_connection",
-        "metric_type": "timing_distribution",
-        "os": "*",
-        "ping_type": "*",
-        "range_max": 123078199,
-        "record": {"key": "123078199", "value": 0.0},
-    },
+    {**BASE_ROW, **dict(zip(["record", "ping_type", "os", "app_build_id"], values))}
+    for values in product(
+        [{"key": "1", "value": 1.0}, {"key": "2", "value": 0.0}],
+        *zip([PING_TYPE, OS, APP_BUILD_ID], ["*"] * 3),
+    )
 ]

 prefix = "glam_etl"
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_minimal/expect.yaml
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_minimal/expect.yaml
@ -1,15 +1,15 @@
 - agg_type: summed_histogram
-  app_build_id: '*'
+  app_build_id: '2020100100'
  app_version: 84
  channel: '*'
  key: ''
  metric: network_tcp_connection
  metric_type: timing_distribution
-  os: '*'
-  ping_type: '*'
-  range_max: 123078199
-  record:
-    key: '112863206'
+  os: Android
+  ping_type: metrics
+  range_max: 2
+  record: &id001
+    key: '1'
    value: 1.0
 - agg_type: summed_histogram
  app_build_id: '*'
@ -18,9 +18,163 @@
  key: ''
  metric: network_tcp_connection
  metric_type: timing_distribution
+  os: Android
+  ping_type: metrics
+  range_max: 2
+  record: *id001
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: metrics
+  range_max: 2
+  record: *id001
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: metrics
+  range_max: 2
+  record: *id001
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: '*'
+  range_max: 2
+  record: *id001
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: '*'
+  range_max: 2
+  record: *id001
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
  os: '*'
  ping_type: '*'
-  range_max: 123078199
-  record:
-    key: '123078199'
+  range_max: 2
+  record: *id001
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: '*'
+  range_max: 2
+  record: *id001
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: metrics
+  range_max: 2
+  record: &id002
+    key: '2'
    value: 0.0
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: metrics
+  range_max: 2
+  record: *id002
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: metrics
+  range_max: 2
+  record: *id002
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: metrics
+  range_max: 2
+  record: *id002
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: '*'
+  range_max: 2
+  record: *id002
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: '*'
+  range_max: 2
+  record: *id002
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: '*'
+  range_max: 2
+  record: *id002
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: '*'
+  range_max: 2
+  record: *id002
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_minimal/glam_etl.org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1.yaml
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_minimal/glam_etl.org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1.yaml
@ -1,4 +1,4 @@
- app_build_id: '*'
+- app_build_id: '2020100100'
  app_version: 84
  channel: '*'
  client_id: df735f02-efe5-4b07-b212-583bb99ba241
@ -8,10 +8,10 @@
    metric: network_tcp_connection
    metric_type: timing_distribution
    value:
-    - key: '112863206'
+    - key: '1'
      value: 1
-    - key: '123078199'
+    - key: '2'
      value: 0
-  os: '*'
-  ping_type: '*'
+  os: Android
+  ping_type: metrics
  sample_id: 1
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_multiple_clients/data.py
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_multiple_clients/data.py
@ -0,0 +1,99 @@
+"""Testing data for query."""
+
+from pathlib import Path
+from itertools import product
+from uuid import uuid4
+
+import yaml
+
+ROOT = Path(__file__).parent
+
+SUBMISSION_DATE = "2020-10-01"
+APP_BUILD_ID = "2020100100"
+OS = "Android"
+PING_TYPE = "metrics"
+
+
+# NOTE: each client contributes a total of 1 to the final aggregate
+CLIENTS_HISTOGRAM_AGGREGATES = [
+    {
+        "sample_id": 1,
+        "client_id": str(uuid4()),
+        "ping_type": PING_TYPE,
+        "os": OS,
+        "app_version": 84,
+        "app_build_id": APP_BUILD_ID,
+        "channel": "*",
+        "histogram_aggregates": [
+            {
+                "metric": "network_tcp_connection",
+                "metric_type": "timing_distribution",
+                "key": "",
+                "agg_type": "summed_histogram",
+                "value": [
+                    {"key": "1", "value": 1},
+                    {"key": "2", "value": 0},
+                ],
+            }
+        ],
+    },
+    {
+        "sample_id": 1,
+        "client_id": str(uuid4()),
+        "ping_type": PING_TYPE,
+        "os": OS,
+        "app_version": 84,
+        "app_build_id": APP_BUILD_ID,
+        "channel": "*",
+        "histogram_aggregates": [
+            {
+                "metric": "network_tcp_connection",
+                "metric_type": "timing_distribution",
+                "key": "",
+                "agg_type": "summed_histogram",
+                "value": [
+                    {"key": "1", "value": 1},
+                    {"key": "3", "value": 1},
+                ],
+            }
+        ],
+    },
+]
+
+BASE_ROW = {
+    "agg_type": "summed_histogram",
+    "app_build_id": "*",
+    "app_version": 84,
+    "channel": "*",
+    "key": "",
+    "metric": "network_tcp_connection",
+    "metric_type": "timing_distribution",
+    "os": "*",
+    "ping_type": "*",
+    "range_max": 3,
+    "record": {"key": "1", "value": 1.0},
+}
+
+EXPECT = [
+    {**BASE_ROW, **dict(zip(["record", "ping_type", "os", "app_build_id"], values))}
+    for values in product(
+        [
+            {"key": "1", "value": 1.5},
+            {"key": "2", "value": 0.0},
+            {"key": "3", "value": 0.5},
+        ],
+        *zip([PING_TYPE, OS, APP_BUILD_ID], ["*"] * 3),
+    )
+]
+
+prefix = "glam_etl"
+tables = [
+    (
+        f"{prefix}.org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1.yaml",
+        CLIENTS_HISTOGRAM_AGGREGATES,
+    ),
+    ("expect.yaml", EXPECT),
+]
+for name, data in tables:
+    with (ROOT / name).open("w") as fp:
+        yaml.dump(data, fp)
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_multiple_clients/expect.yaml
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_multiple_clients/expect.yaml
@ -0,0 +1,270 @@
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: metrics
+  range_max: 3
+  record: &id001
+    key: '1'
+    value: 1.5
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: metrics
+  range_max: 3
+  record: *id001
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: metrics
+  range_max: 3
+  record: *id001
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: metrics
+  range_max: 3
+  record: *id001
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: '*'
+  range_max: 3
+  record: *id001
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: '*'
+  range_max: 3
+  record: *id001
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: '*'
+  range_max: 3
+  record: *id001
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: '*'
+  range_max: 3
+  record: *id001
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: metrics
+  range_max: 3
+  record: &id002
+    key: '2'
+    value: 0.0
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: metrics
+  range_max: 3
+  record: *id002
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: metrics
+  range_max: 3
+  record: *id002
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: metrics
+  range_max: 3
+  record: *id002
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: '*'
+  range_max: 3
+  record: *id002
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: '*'
+  range_max: 3
+  record: *id002
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: '*'
+  range_max: 3
+  record: *id002
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: '*'
+  range_max: 3
+  record: *id002
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: metrics
+  range_max: 3
+  record: &id003
+    key: '3'
+    value: 0.5
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: metrics
+  range_max: 3
+  record: *id003
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: metrics
+  range_max: 3
+  record: *id003
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: metrics
+  range_max: 3
+  record: *id003
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: '*'
+  range_max: 3
+  record: *id003
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: Android
+  ping_type: '*'
+  range_max: 3
+  record: *id003
+- agg_type: summed_histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: '*'
+  range_max: 3
+  record: *id003
+- agg_type: summed_histogram
+  app_build_id: '*'
+  app_version: 84
+  channel: '*'
+  key: ''
+  metric: network_tcp_connection
+  metric_type: timing_distribution
+  os: '*'
+  ping_type: '*'
+  range_max: 3
+  record: *id003
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_multiple_clients/glam_etl.org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1.schema.yaml
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_multiple_clients/glam_etl.org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1.schema.yaml
@ -0,0 +1,47 @@
+- mode: NULLABLE
+  name: sample_id
+  type: INTEGER
+- mode: NULLABLE
+  name: client_id
+  type: STRING
+- mode: NULLABLE
+  name: ping_type
+  type: STRING
+- mode: NULLABLE
+  name: os
+  type: STRING
+- mode: NULLABLE
+  name: app_version
+  type: INTEGER
+- mode: NULLABLE
+  name: app_build_id
+  type: STRING
+- mode: NULLABLE
+  name: channel
+  type: STRING
+- fields:
+  - mode: NULLABLE
+    name: metric
+    type: STRING
+  - mode: NULLABLE
+    name: metric_type
+    type: STRING
+  - mode: NULLABLE
+    name: key
+    type: STRING
+  - mode: NULLABLE
+    name: agg_type
+    type: STRING
+  - fields:
+    - mode: NULLABLE
+      name: key
+      type: STRING
+    - mode: NULLABLE
+      name: value
+      type: INTEGER
+    mode: REPEATED
+    name: value
+    type: RECORD
+  mode: REPEATED
+  name: histogram_aggregates
+  type: RECORD
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_multiple_clients/glam_etl.org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1.yaml
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_multiple_clients/glam_etl.org_mozilla_fenix_glam_nightly__clients_histogram_aggregates_v1.yaml
@ -0,0 +1,34 @@
+- app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  client_id: 5c2364fc-46d4-4537-9575-a8ddaba2b42f
+  histogram_aggregates:
+  - agg_type: summed_histogram
+    key: ''
+    metric: network_tcp_connection
+    metric_type: timing_distribution
+    value:
+    - key: '1'
+      value: 1
+    - key: '2'
+      value: 0
+  os: Android
+  ping_type: metrics
+  sample_id: 1
+- app_build_id: '2020100100'
+  app_version: 84
+  channel: '*'
+  client_id: 813d66e1-57dc-46aa-84ec-682b48cc2e3c
+  histogram_aggregates:
+  - agg_type: summed_histogram
+    key: ''
+    metric: network_tcp_connection
+    metric_type: timing_distribution
+    value:
+    - key: '1'
+      value: 1
+    - key: '3'
+      value: 1
+  os: Android
+  ping_type: metrics
+  sample_id: 1
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_multiple_clients/query_params.yaml
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__histogram_bucket_counts_v1/test_multiple_clients/query_params.yaml
@ -0,0 +1,12 @@
+- name: submission_date
+  type: DATE
+  value: '2020-10-01'
+- name: min_sample_id
+  type: INT64
+  value: 0
+- name: max_sample_id
+  type: INT64
+  value: 99
+- name: sample_size
+  type: INT64
+  value: 100
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/test_minimal/data.py
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/test_minimal/data.py
@ -1,6 +1,7 @@
 """Testing data for query."""
 from pathlib import Path
 from uuid import uuid4
+from itertools import product

 import yaml

@ -8,13 +9,18 @@ ROOT = Path(__file__).parent

 SUBMISSION_DATE = "2020-10-01"
 APP_BUILD_ID = "2020100100"
+OS = "Android"
+PING_TYPE = "metrics"

-# Other tests: non * fields
+# Testing precondition: ping_type, os, and app_build_id must not be "*". See
+# models.py under the scalar_bucket_counts parameters to see that sets fields
+# are used in the static combinations. If these are set to "*", then they will
+# be double counted...
 CLIENTS_SCALAR_AGGREGATES = [
    {
        "client_id": str(uuid4()),
-        "ping_type": "*",
-        "os": "*",
+        "ping_type": PING_TYPE,
+        "os": OS,
        "app_version": 84,
        "app_build_id": APP_BUILD_ID,
        "channel": "*",
@ -30,8 +36,8 @@ CLIENTS_SCALAR_AGGREGATES = [
    },
    {
        "client_id": str(uuid4()),
-        "ping_type": "*",
-        "os": "*",
+        "ping_type": PING_TYPE,
+        "os": OS,
        "app_version": 84,
        "app_build_id": APP_BUILD_ID,
        "channel": "*",
@ -47,108 +53,36 @@ CLIENTS_SCALAR_AGGREGATES = [
    },
 ]

-# TODO: why are the range_min and range_max set at these values?
+# we must generate the set of combinations. Each one of these have the same
+# values though.
+
+BASE_ROW = {
+    "agg_type": "histogram",
+    "app_build_id": "*",
+    "app_version": 84,
+    "bucket": "4.00",
+    "bucket_count": 100,
+    "channel": "*",
+    "client_agg_type": "count",
+    "count": 1,
+    "key": "",
+    "metric": "places_manager_write_query_count",
+    "metric_type": "counter",
+    "os": "*",
+    "ping_type": "*",
+    "range_max": 3.0,
+    "range_min": 2.0,
+}
+
+# Didn't intend to code golf. This enumerates all of the "static combinations"
+# by taking the cross product of all values. Each of these can take on a value
+# from each of the clients above. Since each attribute combination has a single
+# client, we do not have to change the "count" in the base row.
 EXPECT = [
-    {
-        "agg_type": "histogram",
-        "app_build_id": "*",
-        "app_version": 84,
-        "bucket": "16.00",
-        "bucket_count": 100,
-        "channel": "*",
-        "client_agg_type": "count",
-        "count": 1,
-        "key": "",
-        "metric": "places_manager_write_query_count",
-        "metric_type": "counter",
-        "os": "*",
-        "ping_type": "*",
-        "range_max": 5.0,
-        "range_min": 0.0,
-    },
-    {
-        "agg_type": "histogram",
-        "app_build_id": "*",
-        "app_version": 84,
-        "bucket": "32.00",
-        "bucket_count": 100,
-        "channel": "*",
-        "client_agg_type": "count",
-        "count": 1,
-        "key": "",
-        "metric": "places_manager_write_query_count",
-        "metric_type": "counter",
-        "os": "*",
-        "ping_type": "*",
-        "range_max": 5.0,
-        "range_min": 0.0,
-    },
-    {
-        "agg_type": "histogram",
-        "app_build_id": "*",
-        "app_version": 84,
-        "bucket_count": 100,
-        "channel": "*",
-        "client_agg_type": "avg",
-        "count": 2,
-        "key": "",
-        "metric": "places_manager_write_query_count",
-        "metric_type": "counter",
-        "os": "*",
-        "ping_type": "*",
-        "range_max": 5.0,
-        "range_min": 0.0,
-    },
-    {
-        "agg_type": "histogram",
-        "app_build_id": "2020100100",
-        "app_version": 84,
-        "bucket": "16.00",
-        "bucket_count": 100,
-        "channel": "*",
-        "client_agg_type": "count",
-        "count": 1,
-        "key": "",
-        "metric": "places_manager_write_query_count",
-        "metric_type": "counter",
-        "os": "*",
-        "ping_type": "*",
-        "range_max": 5.0,
-        "range_min": 0.0,
-    },
-    {
-        "agg_type": "histogram",
-        "app_build_id": "2020100100",
-        "app_version": 84,
-        "bucket": "32.00",
-        "bucket_count": 100,
-        "channel": "*",
-        "client_agg_type": "count",
-        "count": 1,
-        "key": "",
-        "metric": "places_manager_write_query_count",
-        "metric_type": "counter",
-        "os": "*",
-        "ping_type": "*",
-        "range_max": 5.0,
-        "range_min": 0.0,
-    },
-    {
-        "agg_type": "histogram",
-        "app_build_id": "2020100100",
-        "app_version": 84,
-        "bucket_count": 100,
-        "channel": "*",
-        "client_agg_type": "avg",
-        "count": 2,
-        "key": "",
-        "metric": "places_manager_write_query_count",
-        "metric_type": "counter",
-        "os": "*",
-        "ping_type": "*",
-        "range_max": 5.0,
-        "range_min": 0.0,
-    },
+    {**BASE_ROW, **dict(zip(["bucket", "ping_type", "os", "app_build_id"], values))}
+    for values in product(
+        ["4.00", "8.00"], *zip([PING_TYPE, OS, APP_BUILD_ID], ["*"] * 3)
+    )
 ]

 prefix = "glam_etl"
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/test_minimal/expect.yaml
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/test_minimal/expect.yaml
@ -1,51 +1,97 @@
- agg_type: histogram
-  app_build_id: '*'
-  app_version: 84
-  bucket: '16.00'
-  bucket_count: 100
-  channel: '*'
-  client_agg_type: count
-  count: 1
-  key: ''
-  metric: places_manager_write_query_count
-  metric_type: counter
-  os: '*'
-  ping_type: '*'
-  range_max: 5.0
-  range_min: 0.0
- agg_type: histogram
-  app_build_id: '*'
-  app_version: 84
-  bucket: '32.00'
-  bucket_count: 100
-  channel: '*'
-  client_agg_type: count
-  count: 1
-  key: ''
-  metric: places_manager_write_query_count
-  metric_type: counter
-  os: '*'
-  ping_type: '*'
-  range_max: 5.0
-  range_min: 0.0
- agg_type: histogram
-  app_build_id: '*'
-  app_version: 84
-  bucket_count: 100
-  channel: '*'
-  client_agg_type: avg
-  count: 2
-  key: ''
-  metric: places_manager_write_query_count
-  metric_type: counter
-  os: '*'
-  ping_type: '*'
-  range_max: 5.0
-  range_min: 0.0
 - agg_type: histogram
  app_build_id: '2020100100'
  app_version: 84
-  bucket: '16.00'
+  bucket: '4.00'
+  bucket_count: 100
+  channel: '*'
+  client_agg_type: count
+  count: 1
+  key: ''
+  metric: places_manager_write_query_count
+  metric_type: counter
+  os: Android
+  ping_type: metrics
+  range_max: 3.0
+  range_min: 2.0
+- agg_type: histogram
+  app_build_id: '*'
+  app_version: 84
+  bucket: '4.00'
+  bucket_count: 100
+  channel: '*'
+  client_agg_type: count
+  count: 1
+  key: ''
+  metric: places_manager_write_query_count
+  metric_type: counter
+  os: Android
+  ping_type: metrics
+  range_max: 3.0
+  range_min: 2.0
+- agg_type: histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  bucket: '4.00'
+  bucket_count: 100
+  channel: '*'
+  client_agg_type: count
+  count: 1
+  key: ''
+  metric: places_manager_write_query_count
+  metric_type: counter
+  os: '*'
+  ping_type: metrics
+  range_max: 3.0
+  range_min: 2.0
+- agg_type: histogram
+  app_build_id: '*'
+  app_version: 84
+  bucket: '4.00'
+  bucket_count: 100
+  channel: '*'
+  client_agg_type: count
+  count: 1
+  key: ''
+  metric: places_manager_write_query_count
+  metric_type: counter
+  os: '*'
+  ping_type: metrics
+  range_max: 3.0
+  range_min: 2.0
+- agg_type: histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  bucket: '4.00'
+  bucket_count: 100
+  channel: '*'
+  client_agg_type: count
+  count: 1
+  key: ''
+  metric: places_manager_write_query_count
+  metric_type: counter
+  os: Android
+  ping_type: '*'
+  range_max: 3.0
+  range_min: 2.0
+- agg_type: histogram
+  app_build_id: '*'
+  app_version: 84
+  bucket: '4.00'
+  bucket_count: 100
+  channel: '*'
+  client_agg_type: count
+  count: 1
+  key: ''
+  metric: places_manager_write_query_count
+  metric_type: counter
+  os: Android
+  ping_type: '*'
+  range_max: 3.0
+  range_min: 2.0
+- agg_type: histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  bucket: '4.00'
  bucket_count: 100
  channel: '*'
  client_agg_type: count
@ -55,12 +101,12 @@
  metric_type: counter
  os: '*'
  ping_type: '*'
-  range_max: 5.0
-  range_min: 0.0
+  range_max: 3.0
+  range_min: 2.0
 - agg_type: histogram
-  app_build_id: '2020100100'
+  app_build_id: '*'
  app_version: 84
-  bucket: '32.00'
+  bucket: '4.00'
  bucket_count: 100
  channel: '*'
  client_agg_type: count
@ -70,19 +116,125 @@
  metric_type: counter
  os: '*'
  ping_type: '*'
-  range_max: 5.0
-  range_min: 0.0
+  range_max: 3.0
+  range_min: 2.0
 - agg_type: histogram
  app_build_id: '2020100100'
  app_version: 84
+  bucket: '8.00'
  bucket_count: 100
  channel: '*'
-  client_agg_type: avg
-  count: 2
+  client_agg_type: count
+  count: 1
+  key: ''
+  metric: places_manager_write_query_count
+  metric_type: counter
+  os: Android
+  ping_type: metrics
+  range_max: 3.0
+  range_min: 2.0
+- agg_type: histogram
+  app_build_id: '*'
+  app_version: 84
+  bucket: '8.00'
+  bucket_count: 100
+  channel: '*'
+  client_agg_type: count
+  count: 1
+  key: ''
+  metric: places_manager_write_query_count
+  metric_type: counter
+  os: Android
+  ping_type: metrics
+  range_max: 3.0
+  range_min: 2.0
+- agg_type: histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  bucket: '8.00'
+  bucket_count: 100
+  channel: '*'
+  client_agg_type: count
+  count: 1
+  key: ''
+  metric: places_manager_write_query_count
+  metric_type: counter
+  os: '*'
+  ping_type: metrics
+  range_max: 3.0
+  range_min: 2.0
+- agg_type: histogram
+  app_build_id: '*'
+  app_version: 84
+  bucket: '8.00'
+  bucket_count: 100
+  channel: '*'
+  client_agg_type: count
+  count: 1
+  key: ''
+  metric: places_manager_write_query_count
+  metric_type: counter
+  os: '*'
+  ping_type: metrics
+  range_max: 3.0
+  range_min: 2.0
+- agg_type: histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  bucket: '8.00'
+  bucket_count: 100
+  channel: '*'
+  client_agg_type: count
+  count: 1
+  key: ''
+  metric: places_manager_write_query_count
+  metric_type: counter
+  os: Android
+  ping_type: '*'
+  range_max: 3.0
+  range_min: 2.0
+- agg_type: histogram
+  app_build_id: '*'
+  app_version: 84
+  bucket: '8.00'
+  bucket_count: 100
+  channel: '*'
+  client_agg_type: count
+  count: 1
+  key: ''
+  metric: places_manager_write_query_count
+  metric_type: counter
+  os: Android
+  ping_type: '*'
+  range_max: 3.0
+  range_min: 2.0
+- agg_type: histogram
+  app_build_id: '2020100100'
+  app_version: 84
+  bucket: '8.00'
+  bucket_count: 100
+  channel: '*'
+  client_agg_type: count
+  count: 1
  key: ''
  metric: places_manager_write_query_count
  metric_type: counter
  os: '*'
  ping_type: '*'
-  range_max: 5.0
-  range_min: 0.0
+  range_max: 3.0
+  range_min: 2.0
+- agg_type: histogram
+  app_build_id: '*'
+  app_version: 84
+  bucket: '8.00'
+  bucket_count: 100
+  channel: '*'
+  client_agg_type: count
+  count: 1
+  key: ''
+  metric: places_manager_write_query_count
+  metric_type: counter
+  os: '*'
+  ping_type: '*'
+  range_max: 3.0
+  range_min: 2.0
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/test_minimal/glam_etl.org_mozilla_fenix_glam_nightly__clients_scalar_aggregates_v1.yaml
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_bucket_counts_v1/test_minimal/glam_etl.org_mozilla_fenix_glam_nightly__clients_scalar_aggregates_v1.yaml
@ -1,9 +1,9 @@
 - app_build_id: '2020100100'
  app_version: 84
  channel: '*'
-  client_id: e4b68766-0c07-4896-92d3-0f920dc202f0
-  os: '*'
-  ping_type: '*'
+  client_id: 885422ea-a5fb-489e-b5ac-efa2d57d22f4
+  os: Android
+  ping_type: metrics
  scalar_aggregates:
  - agg_type: count
    key: ''
@ -13,9 +13,9 @@
 - app_build_id: '2020100100'
  app_version: 84
  channel: '*'
-  client_id: 8d20cc29-7bd8-4595-85a0-43aecc9e5432
-  os: '*'
-  ping_type: '*'
+  client_id: 8f206cdb-95d1-46d1-8295-7cc033c76b87
+  os: Android
+  ping_type: metrics
  scalar_aggregates:
  - agg_type: count
    key: ''
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_percentiles_v1/test_minimal/data.py
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_percentiles_v1/test_minimal/data.py
@ -24,7 +24,7 @@ CLIENTS_SCALAR_AGGREGATES = [
                "key": "",
                "agg_type": "count",
                "value": 4.0,
-            },
+            }
        ],
    },
    {
@ -41,7 +41,7 @@ CLIENTS_SCALAR_AGGREGATES = [
                "key": "",
                "agg_type": "count",
                "value": 8.0,
-            },
+            }
        ],
    },
 ]
--- a/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_probe_counts_v1/test_minimal/data.py
+++ b/tests/sql/glam-fenix-dev/glam_etl/org_mozilla_fenix_glam_nightly__scalar_probe_counts_v1/test_minimal/data.py
@ -32,9 +32,9 @@ EXPECT = [
    {
        "agg_type": "histogram",
        "aggregates": [
-            {"key": "1.00", "value": 0.16666666666666666},
-            {"key": "2.00", "value": 0.6666666666666666},
-            {"key": "4.00", "value": 0.16666666666666666},
+            {"key": "1.00", "value": 0.166_666_666_666_666_66},
+            {"key": "2.00", "value": 0.666_666_666_666_666_6},
+            {"key": "4.00", "value": 0.166_666_666_666_666_66},
        ],
        "app_build_id": "*",
        "app_version": 84,
--- a/tests/sql/moz-fx-data-shared-prod/org_mozilla_fenix_derived/geckoview_version_v1/bootstrap.py
+++ b/tests/sql/moz-fx-data-shared-prod/org_mozilla_fenix_derived/geckoview_version_v1/bootstrap.py
@ -41,10 +41,7 @@ def main(test_name):
        # function. We'll also include dates in the future. There is a new
        # version every day.
        rows = [input_row(i, i, i) for i in range(-10, HISTORY_DAYS + 2)]
-        yaml.dump(
-            sorted(rows, key=lambda x: x["client_info"]["app_build"]) * 6,
-            fp,
-        )
+        yaml.dump(sorted(rows, key=lambda x: x["client_info"]["app_build"]) * 6, fp)
    # bad rows, versions less than 100 put before and after the 100 mark. The
    # one for fenix will probably get filtered out because of the channel norm
    # udf.