Bug 1647328 - Schedule performance release criteria queries (#1596)

2021-03-10 11:27:32 -08:00 · 2021-03-10 11:27:32 -08:00 · e0e013a20b
--- a/bigquery_etl/dryrun.py
+++ b/bigquery_etl/dryrun.py
@ -117,6 +117,11 @@ SKIP = {
    "sql/moz-fx-data-shared-prod/stripe_external/setup_intents_v1/init.sql",
    "sql/moz-fx-data-shared-prod/stripe_external/plans_v1/init.sql",
    "sql/moz-fx-data-shared-prod/stripe_external/prices_v1/init.sql",
+    "sql/moz-fx-data-bq-performance/release_criteria/dashboard_health_v1/query.sql",
+    "sql/moz-fx-data-bq-performance/release_criteria/rc_flattened_test_data_v1/query.sql",
+    "sql/moz-fx-data-bq-performance/release_criteria/release_criteria_summary_v1/query.sql",
+    "sql/moz-fx-data-bq-performance/release_criteria/stale_tests_v1/query.sql",
+    "sql/moz-fx-data-bq-performance/release_criteria/release_criteria_v1/query.sql",
    # Already exists (and lacks an "OR REPLACE" clause)
    "sql/moz-fx-data-shared-prod/org_mozilla_firefox_derived/clients_first_seen_v1/init.sql",  # noqa E501
    "sql/moz-fx-data-shared-prod/org_mozilla_firefox_derived/clients_last_seen_v1/init.sql",  # noqa E501
--- a/dags.yaml
+++ b/dags.yaml
@ -391,3 +391,14 @@ bqetl_internal_tooling:
    retry_delay: 30m
    start_date: '2020-06-01'
  schedule_interval: 0 4 * * *
+
+bqetl_release_criteria:
+  schedule_interval: daily
+  default_args:
+    owner: perf-pmo@mozilla.com
+    start_date: "2020-12-03"
+    email:
+      - telemetry-alerts@mozilla.com
+      - dthorn@mozilla.com
+    retries: 2
+    retry_delay: 30m
--- a/dags/bqetl_release_criteria.py
+++ b/dags/bqetl_release_criteria.py
@ -0,0 +1,137 @@
+# Generated via https://github.com/mozilla/bigquery-etl/blob/master/bigquery_etl/query_scheduling/generate_airflow_dags.py
+
+from airflow import DAG
+from airflow.operators.sensors import ExternalTaskSensor
+import datetime
+from utils.gcp import bigquery_etl_query, gke_command
+
+docs = """
+### bqetl_release_criteria
+
+Built from bigquery-etl repo, [`dags/bqetl_release_criteria.py`](https://github.com/mozilla/bigquery-etl/blob/master/dags/bqetl_release_criteria.py)
+
+#### Owner
+
+perf-pmo@mozilla.com
+"""
+
+
+default_args = {
+    "owner": "perf-pmo@mozilla.com",
+    "start_date": datetime.datetime(2020, 12, 3, 0, 0),
+    "end_date": None,
+    "email": ["telemetry-alerts@mozilla.com", "dthorn@mozilla.com"],
+    "depends_on_past": False,
+    "retry_delay": datetime.timedelta(seconds=1800),
+    "email_on_failure": True,
+    "email_on_retry": True,
+    "retries": 2,
+}
+
+with DAG(
+    "bqetl_release_criteria",
+    default_args=default_args,
+    schedule_interval="@daily",
+    doc_md=docs,
+) as dag:
+
+    release_criteria__dashboard_health__v1 = bigquery_etl_query(
+        task_id="release_criteria__dashboard_health__v1",
+        destination_table="dashboard_health_v1",
+        dataset_id="release_criteria",
+        project_id="moz-fx-data-bq-performance",
+        owner="esmyth@mozilla.com",
+        email=[
+            "dthorn@mozilla.com",
+            "esmyth@mozilla.com",
+            "telemetry-alerts@mozilla.com",
+        ],
+        start_date=datetime.datetime(2020, 12, 3, 0, 0),
+        date_partition_parameter=None,
+        depends_on_past=False,
+        dag=dag,
+    )
+
+    release_criteria__rc_flattened_test_data__v1 = bigquery_etl_query(
+        task_id="release_criteria__rc_flattened_test_data__v1",
+        destination_table="rc_flattened_test_data_v1",
+        dataset_id="release_criteria",
+        project_id="moz-fx-data-bq-performance",
+        owner="esmyth@mozilla.com",
+        email=[
+            "dthorn@mozilla.com",
+            "esmyth@mozilla.com",
+            "telemetry-alerts@mozilla.com",
+        ],
+        start_date=datetime.datetime(2020, 12, 3, 0, 0),
+        date_partition_parameter=None,
+        depends_on_past=False,
+        dag=dag,
+    )
+
+    release_criteria__release_criteria__v1 = bigquery_etl_query(
+        task_id="release_criteria__release_criteria__v1",
+        destination_table="release_criteria_v1",
+        dataset_id="release_criteria",
+        project_id="moz-fx-data-bq-performance",
+        owner="esmyth@mozilla.com",
+        email=[
+            "dthorn@mozilla.com",
+            "esmyth@mozilla.com",
+            "telemetry-alerts@mozilla.com",
+        ],
+        start_date=datetime.datetime(2020, 12, 3, 0, 0),
+        date_partition_parameter=None,
+        depends_on_past=False,
+        dag=dag,
+    )
+
+    release_criteria__release_criteria_summary__v1 = bigquery_etl_query(
+        task_id="release_criteria__release_criteria_summary__v1",
+        destination_table="release_criteria_summary_v1",
+        dataset_id="release_criteria",
+        project_id="moz-fx-data-bq-performance",
+        owner="esmyth@mozilla.com",
+        email=[
+            "dthorn@mozilla.com",
+            "esmyth@mozilla.com",
+            "telemetry-alerts@mozilla.com",
+        ],
+        start_date=datetime.datetime(2020, 12, 3, 0, 0),
+        date_partition_parameter=None,
+        depends_on_past=False,
+        dag=dag,
+    )
+
+    release_criteria__stale_tests__v1 = bigquery_etl_query(
+        task_id="release_criteria__stale_tests__v1",
+        destination_table="stale_tests_v1",
+        dataset_id="release_criteria",
+        project_id="moz-fx-data-bq-performance",
+        owner="esmyth@mozilla.com",
+        email=[
+            "dthorn@mozilla.com",
+            "esmyth@mozilla.com",
+            "telemetry-alerts@mozilla.com",
+        ],
+        start_date=datetime.datetime(2020, 12, 3, 0, 0),
+        date_partition_parameter=None,
+        depends_on_past=False,
+        dag=dag,
+    )
+
+    release_criteria__dashboard_health__v1.set_upstream(
+        release_criteria__rc_flattened_test_data__v1
+    )
+
+    release_criteria__release_criteria__v1.set_upstream(
+        release_criteria__rc_flattened_test_data__v1
+    )
+
+    release_criteria__release_criteria_summary__v1.set_upstream(
+        release_criteria__release_criteria__v1
+    )
+
+    release_criteria__stale_tests__v1.set_upstream(
+        release_criteria__release_criteria__v1
+    )
--- a/sql/moz-fx-data-bq-performance/release_criteria/dashboard_health_v1/metadata.yaml
+++ b/sql/moz-fx-data-bq-performance/release_criteria/dashboard_health_v1/metadata.yaml
@ -0,0 +1,17 @@
+---
+description: >
+  Firefox performance tests that do not have a defined release criteria or that
+  are not explicitly excluded (missing_rc), have multiple release criteria
+  defined for a single test (duplicate_rc), or specify the release criteria for
+  a test that has not run in at least 28 days (invalid_rc).
+labels:
+  application: firefox
+  schedule: daily
+owners:
+  - esmyth@mozilla.com
+scheduling:
+  dag_name: bqetl_release_criteria
+  start_date: "2020-12-03"
+  # destination is the whole table, not a single partition,
+  # so don't use date_partition_parameter
+  date_partition_parameter: null
--- a/sql/moz-fx-data-bq-performance/release_criteria/dashboard_health_v1/query.sql
+++ b/sql/moz-fx-data-bq-performance/release_criteria/dashboard_health_v1/query.sql
@ -0,0 +1,74 @@
+WITH recent_tests AS (
+  SELECT DISTINCT
+    task_group_id,
+    framework,
+    platform,
+    IFNULL(normalized_test_type, '') AS test_type,
+    test_name,
+    IFNULL(test_extra_options, '') AS test_extra_options,
+    IFNULL(subtest_name, '') AS subtest_name
+  FROM
+    rc_flattened_test_data_v1
+  WHERE
+    task_group_time >= TIMESTAMP_SUB(current_timestamp, INTERVAL 28 DAY)
+),
+distinct_rc AS (
+  SELECT
+    framework,
+    platform,
+    IFNULL(test_type, '') AS test_type,
+    test_name,
+    IFNULL(test_extra_options, '') AS test_extra_options,
+    IFNULL(subtest_name, '') AS subtest_name,
+    ARRAY_TO_STRING(ARRAY_AGG(DISTINCT rc_test_name), '\n') AS rc_test_name,
+    COUNT(*) AS defined_criteria
+  FROM
+    release_criteria_helper
+  GROUP BY
+    framework,
+    platform,
+    test_type,
+    test_name,
+    test_extra_options,
+    subtest_name
+)
+SELECT DISTINCT
+  distinct_rc.rc_test_name,
+  COALESCE(distinct_rc.framework, recent_tests.framework) AS framework,
+  COALESCE(distinct_rc.platform, recent_tests.platform) AS platform,
+  COALESCE(distinct_rc.test_type, recent_tests.test_type) AS test_type,
+  COALESCE(distinct_rc.test_name, recent_tests.test_name) AS test_name,
+  COALESCE(distinct_rc.test_extra_options, recent_tests.test_extra_options) AS test_extra_options,
+  COALESCE(distinct_rc.subtest_name, recent_tests.subtest_name) AS subtest_name,
+  CASE
+  WHEN
+    distinct_rc.defined_criteria > 1
+  THEN
+    'duplicate_rc'
+  WHEN
+    distinct_rc.rc_test_name IS NULL
+  THEN
+    'missing_rc'
+  WHEN
+    recent_tests.task_group_id IS NULL
+  THEN
+    'invalid_rc'
+  END
+  AS reason,
+FROM
+  distinct_rc
+FULL OUTER JOIN
+  recent_tests
+USING
+  (framework, platform, test_name, test_type, test_extra_options, subtest_name)
+WHERE
+  distinct_rc.rc_test_name IS NULL
+  OR distinct_rc.defined_criteria > 1
+  OR recent_tests.task_group_id IS NULL
+ORDER BY
+  rc_test_name,
+  framework,
+  platform,
+  test_type,
+  test_name,
+  test_extra_options
--- a/sql/moz-fx-data-bq-performance/release_criteria/rc_flattened_test_data_v1/metadata.yaml
+++ b/sql/moz-fx-data-bq-performance/release_criteria/rc_flattened_test_data_v1/metadata.yaml
@ -0,0 +1,14 @@
+---
+friendly_name: Flattened Performance Test Data
+description: Firefox performance test data flattened to 1 row per replicate
+labels:
+  application: firefox
+  schedule: daily
+owners:
+  - esmyth@mozilla.com
+scheduling:
+  dag_name: bqetl_release_criteria
+  start_date: "2020-12-03"
+  # destination is the whole table, not a single partition,
+  # so don't use date_partition_parameter
+  date_partition_parameter: null
--- a/sql/moz-fx-data-bq-performance/release_criteria/rc_flattened_test_data_v1/query.sql
+++ b/sql/moz-fx-data-bq-performance/release_criteria/rc_flattened_test_data_v1/query.sql
@ -0,0 +1,52 @@
+SELECT
+  test_run.taskGroupId AS task_group_id,
+  MAX(test_run.time) OVER (PARTITION BY test_run.taskGroupId) AS task_group_time,
+  test_run.platform,
+  `moz-fx-data-bq-performance.udf.get_form_factor`(test_run.platform) AS normalized_form_factor,
+  `moz-fx-data-bq-performance.udf.get_device_name`(test_run.platform) AS normalized_device_name,
+  `moz-fx-data-bq-performance.udf.get_os_name`(test_run.platform) AS normalized_device_os,
+  test_run.framework,
+  test_run.project,
+  test_run.tier AS test_tier,
+  test_run.recordingDate AS recording_date,
+  test_run.application.name AS app_name,
+  test_run.application.version AS app_version,
+  `moz-fx-data-bq-performance.udf.get_version_part`(
+    test_run.application.version,
+    0
+  ) AS normalized_app_major_version,
+  test_suite.name AS test_name,
+   -- todo Use only test_suite.type once https://bugzilla.mozilla.org/show_bug.cgi?id=1645197 lands
+  `moz-fx-data-bq-performance.udf.get_normalized_test_type`(
+    test_run.framework,
+    test_run.type,
+    test_suite.type
+  ) AS normalized_test_type,
+  test_suite.lowerIsBetter AS test_lower_is_better,
+  test_suite.value AS test_value,
+  test_suite.unit AS test_unit,
+  ARRAY_TO_STRING(
+    ARRAY(SELECT x FROM UNNEST(test_suite.extraOptions) AS x ORDER BY x),
+    '|'
+  ) AS test_extra_options,
+  subtest.name AS subtest_name,
+  subtest.value AS subtest_value,
+  subtest.lowerIsBetter AS subtest_lower_is_better,
+  subtest.unit AS subtest_unit,
+  subtest_replicate,
+  subtest_replicate_offset,
+  test_run.type AS test_type
+FROM
+  `moz-fx-data-taskclu-prod-8fbf.taskclusteretl.perfherder` AS test_run
+LEFT JOIN
+  UNNEST(suites) AS test_suite
+LEFT JOIN
+  UNNEST(test_suite.subtests) AS subtest
+LEFT JOIN
+  UNNEST(subtest.replicates) AS subtest_replicate
+  WITH OFFSET AS subtest_replicate_offset
+WHERE
+  test_run.time >= TIMESTAMP_SUB(current_timestamp, INTERVAL 52 * 7 DAY)
+  AND `moz-fx-data-bq-performance.udf.is_included_project`(test_run.project)
+  AND `moz-fx-data-bq-performance.udf.is_included_framework`(test_run.framework)
+  AND STRPOS(test_run.platform, '-shippable') > 0
--- a/sql/moz-fx-data-bq-performance/release_criteria/release_criteria_summary_v1/metadata.yaml
+++ b/sql/moz-fx-data-bq-performance/release_criteria/release_criteria_summary_v1/metadata.yaml
@ -0,0 +1,15 @@
+---
+description: >
+  Firefox performance test data related to performance release criteria for
+  Desktop and Mobile
+labels:
+  application: firefox
+  schedule: daily
+owners:
+  - esmyth@mozilla.com
+scheduling:
+  dag_name: bqetl_release_criteria
+  start_date: "2020-12-03"
+  # destination is the whole table, not a single partition,
+  # so don't use date_partition_parameter
+  date_partition_parameter: null
--- a/sql/moz-fx-data-bq-performance/release_criteria/release_criteria_summary_v1/query.sql
+++ b/sql/moz-fx-data-bq-performance/release_criteria/release_criteria_summary_v1/query.sql
@ -0,0 +1,198 @@
+WITH stage_1 AS (
+  SELECT
+    project,
+    normalized_form_factor,
+    normalized_device_name,
+    normalized_device_os,
+    rc_tier,
+    rc_test_name,
+    app_name,
+    rc_value,
+    rc_mean_subtest_stddev,
+    test_lower_is_better,
+    rc_one_week_prior_average,
+    rc_two_week_prior_average,
+    rc_three_week_prior_average,
+    rc_four_week_prior_average,
+    rc_one_week_prior_mean_stddev,
+    rc_two_week_prior_mean_stddev,
+    rc_three_week_prior_mean_stddev,
+    rc_four_week_prior_mean_stddev,
+    task_group_time,
+    task_group_id,
+    test_tier,
+    SAFE_DIVIDE(rc_one_week_prior_average - rc_value, rc_value) AS one_week_pct_change,
+    SAFE_DIVIDE(rc_two_week_prior_average - rc_value, rc_value) AS two_week_pct_change,
+    SAFE_DIVIDE(rc_three_week_prior_average - rc_value, rc_value) AS three_week_pct_change,
+    SAFE_DIVIDE(rc_four_week_prior_average - rc_value, rc_value) AS four_week_pct_change,
+  FROM
+    release_criteria_v1 AS a
+  WHERE
+    task_group_time = (
+      SELECT
+        MAX(task_group_time)
+      FROM
+        release_criteria_v1 AS b
+      WHERE
+        a.project = b.project
+        AND a.platform = b.platform
+        AND a.app_name = b.app_name
+        AND a.rc_tier = b.rc_tier
+        AND a.rc_test_name = b.rc_test_name
+    )
+),
+stage_2 AS (
+  SELECT
+    a.*,
+    b.rc_value AS target_value,
+    b.rc_mean_subtest_stddev AS target_stddev,
+    SAFE_DIVIDE(b.rc_value - a.rc_value, a.rc_value) AS target_pct_diff,
+    ARRAY(
+      SELECT
+        `moz-fx-data-bq-performance`.udf.is_change_significant(
+          a.rc_value,
+          a.rc_mean_subtest_stddev,
+          mean_old,
+          stddev_old
+        )
+      FROM
+        UNNEST(
+          ARRAY<STRUCT<mean_old FLOAT64, stddev_old FLOAT64>>[
+            (b.rc_value, b.rc_mean_subtest_stddev),
+            (a.rc_one_week_prior_average, a.rc_one_week_prior_mean_stddev),
+            (a.rc_two_week_prior_average, a.rc_two_week_prior_mean_stddev),
+            (a.rc_three_week_prior_average, a.rc_three_week_prior_mean_stddev),
+            (a.rc_four_week_prior_average, a.rc_four_week_prior_mean_stddev)
+          ]
+        )
+    ) AS change_significant,
+  FROM
+    stage_1 AS a
+  LEFT JOIN
+    (
+      SELECT DISTINCT
+        task_group_id,
+        project,
+        normalized_device_name,
+        normalized_device_os,
+        rc_tier,
+        rc_test_name,
+        rc_value,
+        rc_mean_subtest_stddev,
+      FROM
+        stage_1
+      WHERE
+        STARTS_WITH(app_name, 'target')
+    ) AS b
+  USING
+    (task_group_id, project, normalized_device_name, normalized_device_os, rc_tier, rc_test_name)
+  WHERE
+    NOT STARTS_WITH(app_name, 'target')
+),
+stage_3 AS (
+  SELECT
+    *,
+    ARRAY(
+      SELECT
+        `moz-fx-data-bq-performance`.udf.interpret_change(direction, test_lower_is_better)
+      FROM
+        UNNEST(change_significant)
+    ) AS interpreted_change,
+  FROM
+    stage_2
+),
+stage_4 AS (
+  SELECT
+    *,
+    change_significant[OFFSET(0)] AS is_target_diff_significant,
+    change_significant[OFFSET(1)] AS is_one_week_change_significant,
+    change_significant[OFFSET(2)] AS is_two_week_change_significant,
+    change_significant[OFFSET(3)] AS is_three_week_change_significant,
+    change_significant[OFFSET(4)] AS is_four_week_change_significant,
+    interpreted_change[OFFSET(0)] AS vs_target,
+    interpreted_change[OFFSET(1)] AS vs_one_week_prior,
+    interpreted_change[OFFSET(2)] AS vs_two_week_prior,
+    interpreted_change[OFFSET(3)] AS vs_three_week_prior,
+    interpreted_change[OFFSET(4)] AS vs_four_week_prior,
+    ARRAY(
+      SELECT
+        `moz-fx-data-bq-performance`.udf.get_formatted_comparison(
+          value,
+          pct,
+          _interpreted_change,
+          confidence
+        )
+      FROM
+        UNNEST(
+          ARRAY<STRUCT<value FLOAT64, pct FLOAT64, _interpreted_change STRING, confidence FLOAT64>>[
+            (
+              target_value,
+              target_pct_diff,
+              interpreted_change[OFFSET(0)],
+              change_significant[OFFSET(0)].confidence
+            ),
+            (
+              rc_one_week_prior_average,
+              one_week_pct_change,
+              interpreted_change[OFFSET(1)],
+              change_significant[OFFSET(1)].confidence
+            ),
+            (
+              rc_two_week_prior_average,
+              two_week_pct_change,
+              interpreted_change[OFFSET(2)],
+              change_significant[OFFSET(2)].confidence
+            ),
+            (
+              rc_three_week_prior_average,
+              three_week_pct_change,
+              interpreted_change[OFFSET(3)],
+              change_significant[OFFSET(3)].confidence
+            ),
+            (
+              rc_four_week_prior_average,
+              four_week_pct_change,
+              interpreted_change[OFFSET(4)],
+              change_significant[OFFSET(4)].confidence
+            )
+          ]
+        )
+    ) AS formatted_comparison,
+  FROM
+    stage_3
+)
+SELECT
+  project,
+  normalized_form_factor,
+  normalized_device_name,
+  normalized_device_os,
+  rc_tier,
+  rc_test_name,
+  app_name,
+  ROUND(rc_value, 2) AS `current`,
+  CONCAT(
+    DATE(task_group_time),
+    IF(`moz-fx-data-bq-performance.udf.is_stale_test`(task_group_time, test_tier), ' ⏳', '')
+  ) AS last_updated,
+  formatted_comparison[OFFSET(0)] AS target,
+  formatted_comparison[OFFSET(1)] AS one_week_prior,
+  formatted_comparison[OFFSET(2)] AS two_week_prior,
+  formatted_comparison[OFFSET(3)] AS three_week_prior,
+  formatted_comparison[OFFSET(4)] AS four_week_prior,
+  IF(vs_target = 'regression', rc_test_name, NULL) AS vs_target_regression,
+  IF(vs_target = 'improvement', rc_test_name, NULL) AS vs_target_unchanged,
+  IF(vs_target = 'unchanged', rc_test_name, NULL) AS vs_target_improvement,
+  IF(vs_one_week_prior = 'regression', rc_test_name, NULL) AS vs_one_week_prior_regression,
+  IF(vs_one_week_prior = 'improvement', rc_test_name, NULL) AS vs_one_week_prior_unchanged,
+  IF(vs_one_week_prior = 'unchanged', rc_test_name, NULL) AS vs_one_week_prior_improvement,
+  IF(vs_two_week_prior = 'regression', rc_test_name, NULL) AS vs_two_week_prior_regression,
+  IF(vs_two_week_prior = 'improvement', rc_test_name, NULL) AS vs_two_week_prior_unchanged,
+  IF(vs_two_week_prior = 'unchanged', rc_test_name, NULL) AS vs_two_week_prior_improvement,
+  IF(vs_three_week_prior = 'regression', rc_test_name, NULL) AS vs_three_week_prior_regression,
+  IF(vs_three_week_prior = 'improvement', rc_test_name, NULL) AS vs_three_week_prior_unchanged,
+  IF(vs_three_week_prior = 'unchanged', rc_test_name, NULL) AS vs_three_week_prior_improvement,
+  IF(vs_four_week_prior = 'regression', rc_test_name, NULL) AS vs_four_week_prior_regression,
+  IF(vs_four_week_prior = 'improvement', rc_test_name, NULL) AS vs_four_week_prior_unchanged,
+  IF(vs_four_week_prior = 'unchanged', rc_test_name, NULL) AS vs_four_week_prior_improvement,
+FROM
+  stage_4
--- a/sql/moz-fx-data-bq-performance/release_criteria/release_criteria_v1/metadata.yaml
+++ b/sql/moz-fx-data-bq-performance/release_criteria/release_criteria_v1/metadata.yaml
@ -0,0 +1,15 @@
+---
+description: >
+  Firefox performance test data related to performance release criteria for
+  Desktop and Mobile
+labels:
+  application: firefox
+  schedule: daily
+owners:
+  - esmyth@mozilla.com
+scheduling:
+  dag_name: bqetl_release_criteria
+  start_date: "2020-12-03"
+  # destination is the whole table, not a single partition,
+  # so don't use date_partition_parameter
+  date_partition_parameter: null
--- a/sql/moz-fx-data-bq-performance/release_criteria/release_criteria_v1/query.sql
+++ b/sql/moz-fx-data-bq-performance/release_criteria/release_criteria_v1/query.sql
@ -0,0 +1,486 @@
+WITH rc_included_tests AS (
+  -- Filter and correct table that defines the aggregation for release criteria
+  SELECT
+    * REPLACE (
+      IFNULL(rc_tier, 5) AS rc_tier,
+      IFNULL(rc_test_aggregator, 'use_existing') AS rc_test_aggregator,
+      IFNULL(rc_subtest_aggregator, 'use_existing') AS rc_subtest_aggregator,
+      IFNULL(rc_replicate_aggregator, 'use_existing') AS rc_replicate_aggregator,
+      IFNULL(rc_ignore_first_replicates, 0) AS rc_ignore_first_replicates,
+      IFNULL(rc_target_type, 'none') AS rc_target_type,
+      IFNULL(test_extra_options, '') AS test_extra_options
+    )
+  FROM
+    release_criteria_helper
+  WHERE
+    rc_test_name IS NOT NULL
+    AND rc_test_name != 'exclude'
+),
+rc_test_data AS (
+  -- Join the flattened test data with the table that defines the aggregation for release criteria
+  SELECT
+    flattened.* EXCEPT (subtest_replicate_offset, test_type),
+    CONCAT(
+      flattened.task_group_id,
+      flattened.project,
+      flattened.platform,
+      COALESCE(flattened.app_name, ''),
+      included_rc.rc_tier,
+      included_rc.rc_test_name
+    ) AS rc_test_aggregation_key,
+    included_rc.rc_test_name,
+    included_rc.rc_tier,
+    included_rc.rc_test_aggregator,
+    included_rc.rc_subtest_aggregator,
+    included_rc.rc_replicate_aggregator,
+    included_rc.rc_ignore_first_replicates,
+    included_rc.rc_target_type,
+    included_rc.rc_target_value,
+    included_rc.rc_target_app,
+  FROM
+    rc_flattened_test_data_v1 AS flattened
+  LEFT JOIN
+    rc_included_tests AS included_rc
+  ON
+    flattened.framework = included_rc.framework
+    AND flattened.platform = included_rc.platform
+    AND flattened.normalized_test_type = included_rc.test_type
+    AND flattened.test_name = included_rc.test_name
+    AND flattened.test_extra_options = included_rc.test_extra_options
+    AND flattened.subtest_name = included_rc.subtest_name
+  WHERE
+    rc_ignore_first_replicates < (subtest_replicate_offset + 1)
+),
+kurtosis_parts AS (
+  SELECT
+    * EXCEPT (subtest_replicate),
+    CASE
+      rc_replicate_aggregator
+    WHEN
+      'use_existing'
+    THEN
+      subtest_value
+    WHEN
+      'mean'
+    THEN
+      AVG(subtest_replicate) OVER (replicate_window)
+    WHEN
+      'median'
+    THEN
+      PERCENTILE_CONT(subtest_replicate, 0.5) OVER (replicate_window)
+    WHEN
+      'geomean'
+    THEN
+        -- Adding 1 to the replicate value then subtracting 1 from the result ensures that a value is always returned.
+        -- Maintaining here to be consistent with how the perf test framework calculates geomean: https://searchfox.org/mozilla-central/source/testing/talos/talos/filter.py#174
+      EXP(
+        AVG(LN(IF(rc_replicate_aggregator = 'geomean', subtest_replicate + 1, NULL))) OVER (
+          replicate_window
+        )
+      ) - 1
+    WHEN
+      'max'
+    THEN
+      MAX(subtest_replicate) OVER (replicate_window)
+    WHEN
+      'min'
+    THEN
+      MIN(subtest_replicate) OVER (replicate_window)
+    WHEN
+      'sum'
+    THEN
+      SUM(subtest_replicate) OVER (replicate_window)
+    ELSE
+      ERROR(CONCAT('Unknown replicate aggregator: ', rc_replicate_aggregator))
+    END
+    AS rc_subtest_value,
+    AVG(subtest_replicate) OVER (replicate_window) AS rc_replicate_mean,
+    STDDEV_SAMP(subtest_replicate) OVER (replicate_window) AS rc_replicate_stddev,
+    COUNT(subtest_replicate) OVER (replicate_window) AS rc_replicate_count,
+    TO_JSON_STRING(ARRAY_AGG(subtest_replicate) OVER (replicate_window)) AS replicate_values_json,
+    SUM(1.0 * subtest_replicate) OVER (replicate_window) AS rx,
+    SUM(POWER(1.0 * subtest_replicate, 2)) OVER (replicate_window) AS rx2,
+    SUM(POWER(1.0 * subtest_replicate, 3)) OVER (replicate_window) AS rx3,
+    SUM(POWER(1.0 * subtest_replicate, 4)) OVER (replicate_window) AS rx4,
+  FROM
+    rc_test_data
+  WINDOW
+    replicate_window AS (
+      PARTITION BY
+        rc_test_aggregation_key,
+        test_name,
+        subtest_name
+    )
+),
+subtests AS (
+  -- Aggregate the in-scope replicates and calculate summary statistics
+  SELECT DISTINCT
+    * EXCEPT (rx, rx2, rx3, rx4),
+    `moz-fx-data-bq-performance.udf.skewness`(
+      rc_replicate_count,
+      rc_replicate_mean,
+      rc_replicate_stddev,
+      rx,
+      rx2,
+      rx3
+    ) AS rc_replicate_skewness,
+    `moz-fx-data-bq-performance.udf.kurtosis`(
+      rc_replicate_count,
+      rc_replicate_mean,
+      rc_replicate_stddev,
+      rx,
+      rx2,
+      rx3,
+      rx4
+    ) AS rc_replicate_kurtosis,
+  FROM
+    kurtosis_parts
+),
+tests AS (
+  -- Aggregate the in-scope sub tests and calculate the stddev and count
+  SELECT DISTINCT
+    * EXCEPT (
+      rc_subtest_value,
+      rc_replicate_mean,
+      rc_replicate_stddev,
+      rc_replicate_count,
+      test_tier,
+      subtest_name,
+      subtest_value,
+      subtest_lower_is_better,
+      subtest_unit
+    ),
+    CASE
+      rc_subtest_aggregator
+    WHEN
+      'use_existing'
+    THEN
+      rc_subtest_value
+    WHEN
+      'mean'
+    THEN
+      AVG(rc_subtest_value) OVER (subtest_window)
+    WHEN
+      'median'
+    THEN
+      PERCENTILE_CONT(rc_subtest_value, 0.5) OVER (subtest_window)
+    WHEN
+      'geomean'
+    THEN
+      -- Adding 1 to the subtest value then subtracting 1 from the result ensures that a value is always returned.
+      -- Maintaining here to be consistent with how the perf test framework calculates geomean: https://searchfox.org/mozilla-central/source/testing/talos/talos/filter.py#174
+      EXP(
+        AVG(LN(IF(rc_subtest_aggregator = 'geomean', rc_subtest_value + 1, NULL))) OVER (
+          subtest_window
+        )
+      ) - 1
+    WHEN
+      'max'
+    THEN
+      MAX(rc_subtest_value) OVER (subtest_window)
+    WHEN
+      'min'
+    THEN
+      MIN(rc_subtest_value) OVER (subtest_window)
+    WHEN
+      'sum'
+    THEN
+      SUM(rc_subtest_value) OVER (subtest_window)
+    ELSE
+      ERROR(CONCAT('Unknown subtest aggregator: ', rc_subtest_aggregator))
+    END
+    AS rc_test_value,
+    AVG(rc_replicate_mean) OVER (subtest_window) AS rc_subtest_mean,
+    SQRT(
+      SUM(POWER(rc_replicate_stddev, 2)) OVER (subtest_window)
+    ) AS rc_propagated_error, -- Propagate error through aggregation
+    MAX(rc_replicate_stddev) OVER (subtest_window) AS rc_max_subtest_stddev,
+    AVG(rc_replicate_stddev) OVER (subtest_window) AS rc_mean_subtest_stddev,
+    COUNT(DISTINCT subtest_name) OVER (subtest_window) AS rc_subtest_count,
+--     AVG(rc_replicate_count) OVER(subtest_window) AS rc_subtest_avg_replicates,
+    -- Would prefer to just save the array as a JSON string but trying to use `ARRAY_CONCAT_AGG` in the next stage produces an unsupported error
+    ARRAY_TO_STRING(
+      ARRAY_AGG(CAST(rc_replicate_count AS STRING)) OVER (subtest_window),
+      ','
+    ) AS rc_subtest_replicate_counts,
+    MIN(test_tier) OVER (subtest_window) AS test_tier,
+  FROM
+    subtests
+  WINDOW
+    subtest_window AS (
+      PARTITION BY
+        rc_test_aggregation_key,
+        test_name
+    )
+),
+rc_tests AS (
+  -- Aggregate the in-scope sub tests and calculate the stddev and count
+  SELECT DISTINCT
+    * EXCEPT (
+      rc_test_value,
+      rc_subtest_mean,
+      rc_propagated_error,
+      rc_max_subtest_stddev,
+      rc_mean_subtest_stddev,
+      rc_subtest_count,
+      rc_subtest_replicate_counts,
+      test_name,
+      test_value,
+      test_tier
+    ),
+    CASE
+      rc_subtest_aggregator
+    WHEN
+      'use_existing'
+    THEN
+      rc_test_value
+    WHEN
+      'mean'
+    THEN
+      AVG(rc_test_value) OVER (test_window)
+    WHEN
+      'median'
+    THEN
+      PERCENTILE_CONT(rc_test_value, 0.5) OVER (test_window)
+      -- Adding 1 to the subtest value then subtracting 1 from the result ensures that a value is always returned.
+      -- Maintaining here to be consistent with how the perf test framework calculates geomean: https://searchfox.org/mozilla-central/source/testing/talos/talos/filter.py#174
+    WHEN
+      'geomean'
+    THEN
+      EXP(
+        AVG(LN(IF(rc_test_aggregator = 'geomean', rc_test_value + 1, NULL))) OVER (test_window)
+      ) - 1
+    WHEN
+      'max'
+    THEN
+      MAX(rc_test_value) OVER (test_window)
+    WHEN
+      'min'
+    THEN
+      MIN(rc_test_value) OVER (test_window)
+    WHEN
+      'sum'
+    THEN
+      SUM(rc_test_value) OVER (test_window)
+    ELSE
+      ERROR(CONCAT('Unknown subtest aggregator: ', rc_subtest_aggregator))
+    END
+    AS rc_value,
+    AVG(rc_subtest_mean) OVER (test_window) AS rc_test_mean,
+    SQRT(
+      SUM(POWER(rc_propagated_error, 2)) OVER (test_window)
+    ) AS rc_propagated_error, -- Propagate error through aggregation
+    MAX(rc_max_subtest_stddev) OVER (test_window) AS rc_max_subtest_stddev,
+    AVG(rc_mean_subtest_stddev) OVER (test_window) AS rc_mean_subtest_stddev,
+    COUNT(DISTINCT test_name) OVER (test_window) AS rc_test_count,
+    AVG(rc_subtest_count) OVER (test_window) AS rc_test_avg_subtests,
+    ARRAY_TO_STRING(
+      ARRAY_AGG(CAST(rc_subtest_replicate_counts AS STRING)) OVER (test_window),
+      ','
+    ) AS rc_test_replicate_counts,
+    MIN(test_tier) OVER (test_window) AS test_tier,
+  FROM
+    tests
+  WINDOW
+    test_window AS (
+      PARTITION BY
+        rc_test_aggregation_key
+    )
+),
+versions AS (
+  SELECT DISTINCT
+    date_utc,
+    category,
+    MAX(`moz-fx-data-bq-performance`.udf.get_version_part(version, 0)) AS major_version,
+  FROM
+    `dp2-prod`.sumo.release_calendar
+  GROUP BY
+    date_utc,
+    category
+),
+rc_tests_with_tested_version AS (
+  -- Order the columns and reduce to the distinct aggregated tests (quality control on calculations is possible on aggregation temporary tables)
+  SELECT DISTINCT
+    task_group_time,
+    platform,
+    normalized_form_factor,
+    normalized_device_name,
+    normalized_device_os,
+    framework,
+    project,
+    rc_tier,
+    recording_date,
+    app_name,
+    app_version,
+    normalized_app_major_version,
+    rc_test_name,
+    normalized_test_type,
+    rc_value,
+    rc_propagated_error,
+    rc_max_subtest_stddev,
+    rc_mean_subtest_stddev,
+    rc_test_count,
+    rc_test_avg_subtests,
+    TO_JSON_STRING(
+      (
+        SELECT
+          APPROX_QUANTILES(CAST(x AS INT64), 3)
+        FROM
+          UNNEST(SPLIT(rc_test_replicate_counts, ',')) AS x
+      )
+    ) AS replicate_quantiles,
+    rc_target_type,
+    rc_target_value,
+    rc_target_app,
+    test_lower_is_better,
+    test_unit,
+    IF(
+      project IN ('fenix', 'mozilla-central', 'autoland'),
+      major_version + 1,
+      major_version
+    ) AS firefox_version,
+    rc_test_aggregation_key,
+    task_group_id,
+    test_tier
+  FROM
+    rc_tests
+  LEFT JOIN
+    versions
+  ON
+    date_utc = DATE(task_group_time)
+    AND (
+      (project IN ('fenix', 'mozilla-central', 'mozilla-beta', 'autoland') AND category = 'dev')
+      OR (project = 'mozilla-release' AND category = 'stability')
+      OR (project = 'mozilla-esr68' AND category = 'esr')
+    )
+  WHERE
+    rc_value IS NOT NULL
+),
+build_targets AS (
+  -- Builds the results rows for tests with a relative or absolute target value
+  SELECT
+    * REPLACE (
+      CASE
+      WHEN
+        rc_target_type = 'absolute'
+      THEN
+        'target'
+      WHEN
+        rc_target_type = 'relative'
+        AND rc_target_app IS NOT NULL
+      THEN
+        CONCAT('target (vs ', rc_target_app, ')')
+      WHEN
+        rc_target_type = 'relative'
+        AND rc_target_app IS NULL
+      THEN
+        ERROR(
+          CONCAT(
+            'Relative target with null rc_target_app: ',
+            platform,
+            framework,
+            project,
+            rc_test_name
+          )
+        )
+      END
+      AS app_name,
+      CASE
+      WHEN
+        rc_target_type = 'absolute'
+      THEN
+        rc_target_value
+      WHEN
+        rc_target_type = 'relative'
+      THEN
+        rc_target_value * rc_value
+      END
+      AS rc_value
+    )
+  FROM
+    rc_tests_with_tested_version
+  WHERE
+    rc_target_type = 'absolute'
+    OR (rc_target_type = 'relative' AND rc_target_app = app_name)
+),
+rc_tests_with_targets AS (
+  SELECT
+    *
+  FROM
+    rc_tests_with_tested_version
+  UNION ALL
+  SELECT
+    *
+  FROM
+    build_targets
+)
+SELECT DISTINCT
+  *,
+  AVG(rc_value) OVER (one_week_window) AS rc_one_week_prior_average,
+  SQRT(
+    SUM(POWER(rc_propagated_error, 2)) OVER (one_week_window)
+  ) AS rc_one_week_prior_propagated_error,
+  MAX(rc_max_subtest_stddev) OVER (one_week_window) AS rc_one_week_prior_max_stddev,
+  AVG(rc_mean_subtest_stddev) OVER (one_week_window) AS rc_one_week_prior_mean_stddev,
+  COUNT(rc_value) OVER (one_week_window) AS rc_one_week_prior_count,
+  AVG(rc_value) OVER (two_week_window) AS rc_two_week_prior_average,
+  SQRT(
+    SUM(POWER(rc_propagated_error, 2)) OVER (two_week_window)
+  ) AS rc_two_week_prior_propagated_error,
+  MAX(rc_max_subtest_stddev) OVER (two_week_window) AS rc_two_week_prior_max_stddev,
+  AVG(rc_mean_subtest_stddev) OVER (two_week_window) AS rc_two_week_prior_mean_stddev,
+  COUNT(rc_value) OVER (two_week_window) AS rc_two_week_prior_count,
+  AVG(rc_value) OVER (three_week_window) AS rc_three_week_prior_average,
+  SQRT(
+    SUM(POWER(rc_propagated_error, 2)) OVER (three_week_window)
+  ) AS rc_three_week_prior_propagated_error,
+  MAX(rc_max_subtest_stddev) OVER (three_week_window) AS rc_three_week_prior_max_stddev,
+  AVG(rc_mean_subtest_stddev) OVER (three_week_window) AS rc_three_week_prior_mean_stddev,
+  COUNT(rc_value) OVER (three_week_window) AS rc_three_week_prior_count,
+  AVG(rc_value) OVER (four_week_window) AS rc_four_week_prior_average,
+  SQRT(
+    SUM(POWER(rc_propagated_error, 2)) OVER (four_week_window)
+  ) AS rc_four_week_prior_propagated_error,
+  MAX(rc_max_subtest_stddev) OVER (four_week_window) AS rc_four_week_prior_max_stddev,
+  AVG(rc_mean_subtest_stddev) OVER (four_week_window) AS rc_four_week_prior_mean_stddev,
+  COUNT(rc_value) OVER (four_week_window) AS rc_four_week_prior_count,
+FROM
+  rc_tests_with_targets
+WINDOW
+  moving_average_window AS (
+    PARTITION BY
+      project,
+      platform,
+      rc_tier,
+      rc_test_name,
+      app_name
+    ORDER BY
+      UNIX_DATE(DATE(task_group_time)) ASC
+  ),
+  one_week_window AS (
+    moving_average_window
+    RANGE BETWEEN
+      13 PRECEDING
+      AND 7 PRECEDING
+  ),
+  two_week_window AS (
+    moving_average_window
+    RANGE BETWEEN
+      20 PRECEDING
+      AND 14 PRECEDING
+  ),
+  three_week_window AS (
+    moving_average_window
+    RANGE BETWEEN
+      27 PRECEDING
+      AND 21 PRECEDING
+  ),
+  four_week_window AS (
+    moving_average_window
+    RANGE BETWEEN
+      34 PRECEDING
+      AND 28 PRECEDING
+  )
+ORDER BY
+  task_group_time DESC,
+  rc_test_aggregation_key
--- a/sql/moz-fx-data-bq-performance/release_criteria/stale_tests_v1/metadata.yaml
+++ b/sql/moz-fx-data-bq-performance/release_criteria/stale_tests_v1/metadata.yaml
@ -0,0 +1,15 @@
+---
+description: >
+  Firefox performance tests that have not run as recently as expected by their
+  test tier
+labels:
+  application: firefox
+  schedule: daily
+owners:
+  - esmyth@mozilla.com
+scheduling:
+  dag_name: bqetl_release_criteria
+  start_date: "2020-12-03"
+  # destination is the whole table, not a single partition,
+  # so don't use date_partition_parameter
+  date_partition_parameter: null
--- a/sql/moz-fx-data-bq-performance/release_criteria/stale_tests_v1/query.sql
+++ b/sql/moz-fx-data-bq-performance/release_criteria/stale_tests_v1/query.sql
@ -0,0 +1,23 @@
+SELECT DISTINCT
+  project,
+  platform,
+  framework,
+  test_tier,
+  rc_test_name,
+  task_group_time
+FROM
+  release_criteria_v1 AS a
+WHERE
+  `moz-fx-data-bq-performance.udf.is_stale_test`(task_group_time, test_tier)
+  AND task_group_time = (
+    SELECT
+      MAX(task_group_time)
+    FROM
+      release_criteria_v1 AS b
+    WHERE
+      a.project = b.project
+      AND a.platform = b.platform
+      AND a.app_name = b.app_name
+      AND a.rc_tier = b.rc_tier
+      AND a.rc_test_name = b.rc_test_name
+  )