Bug 1647328 - Schedule performance release criteria queries (#1596)

This commit is contained in:
Daniel Thorn 2021-03-10 11:27:32 -08:00 коммит произвёл GitHub
Родитель 201e8b160e
Коммит e0e013a20b
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
13 изменённых файлов: 1062 добавлений и 0 удалений

Просмотреть файл

@ -117,6 +117,11 @@ SKIP = {
"sql/moz-fx-data-shared-prod/stripe_external/setup_intents_v1/init.sql",
"sql/moz-fx-data-shared-prod/stripe_external/plans_v1/init.sql",
"sql/moz-fx-data-shared-prod/stripe_external/prices_v1/init.sql",
"sql/moz-fx-data-bq-performance/release_criteria/dashboard_health_v1/query.sql",
"sql/moz-fx-data-bq-performance/release_criteria/rc_flattened_test_data_v1/query.sql",
"sql/moz-fx-data-bq-performance/release_criteria/release_criteria_summary_v1/query.sql",
"sql/moz-fx-data-bq-performance/release_criteria/stale_tests_v1/query.sql",
"sql/moz-fx-data-bq-performance/release_criteria/release_criteria_v1/query.sql",
# Already exists (and lacks an "OR REPLACE" clause)
"sql/moz-fx-data-shared-prod/org_mozilla_firefox_derived/clients_first_seen_v1/init.sql", # noqa E501
"sql/moz-fx-data-shared-prod/org_mozilla_firefox_derived/clients_last_seen_v1/init.sql", # noqa E501

Просмотреть файл

@ -391,3 +391,14 @@ bqetl_internal_tooling:
retry_delay: 30m
start_date: '2020-06-01'
schedule_interval: 0 4 * * *
bqetl_release_criteria:
schedule_interval: daily
default_args:
owner: perf-pmo@mozilla.com
start_date: "2020-12-03"
email:
- telemetry-alerts@mozilla.com
- dthorn@mozilla.com
retries: 2
retry_delay: 30m

Просмотреть файл

@ -0,0 +1,137 @@
# Generated via https://github.com/mozilla/bigquery-etl/blob/master/bigquery_etl/query_scheduling/generate_airflow_dags.py
from airflow import DAG
from airflow.operators.sensors import ExternalTaskSensor
import datetime
from utils.gcp import bigquery_etl_query, gke_command
docs = """
### bqetl_release_criteria
Built from bigquery-etl repo, [`dags/bqetl_release_criteria.py`](https://github.com/mozilla/bigquery-etl/blob/master/dags/bqetl_release_criteria.py)
#### Owner
perf-pmo@mozilla.com
"""
default_args = {
"owner": "perf-pmo@mozilla.com",
"start_date": datetime.datetime(2020, 12, 3, 0, 0),
"end_date": None,
"email": ["telemetry-alerts@mozilla.com", "dthorn@mozilla.com"],
"depends_on_past": False,
"retry_delay": datetime.timedelta(seconds=1800),
"email_on_failure": True,
"email_on_retry": True,
"retries": 2,
}
with DAG(
"bqetl_release_criteria",
default_args=default_args,
schedule_interval="@daily",
doc_md=docs,
) as dag:
release_criteria__dashboard_health__v1 = bigquery_etl_query(
task_id="release_criteria__dashboard_health__v1",
destination_table="dashboard_health_v1",
dataset_id="release_criteria",
project_id="moz-fx-data-bq-performance",
owner="esmyth@mozilla.com",
email=[
"dthorn@mozilla.com",
"esmyth@mozilla.com",
"telemetry-alerts@mozilla.com",
],
start_date=datetime.datetime(2020, 12, 3, 0, 0),
date_partition_parameter=None,
depends_on_past=False,
dag=dag,
)
release_criteria__rc_flattened_test_data__v1 = bigquery_etl_query(
task_id="release_criteria__rc_flattened_test_data__v1",
destination_table="rc_flattened_test_data_v1",
dataset_id="release_criteria",
project_id="moz-fx-data-bq-performance",
owner="esmyth@mozilla.com",
email=[
"dthorn@mozilla.com",
"esmyth@mozilla.com",
"telemetry-alerts@mozilla.com",
],
start_date=datetime.datetime(2020, 12, 3, 0, 0),
date_partition_parameter=None,
depends_on_past=False,
dag=dag,
)
release_criteria__release_criteria__v1 = bigquery_etl_query(
task_id="release_criteria__release_criteria__v1",
destination_table="release_criteria_v1",
dataset_id="release_criteria",
project_id="moz-fx-data-bq-performance",
owner="esmyth@mozilla.com",
email=[
"dthorn@mozilla.com",
"esmyth@mozilla.com",
"telemetry-alerts@mozilla.com",
],
start_date=datetime.datetime(2020, 12, 3, 0, 0),
date_partition_parameter=None,
depends_on_past=False,
dag=dag,
)
release_criteria__release_criteria_summary__v1 = bigquery_etl_query(
task_id="release_criteria__release_criteria_summary__v1",
destination_table="release_criteria_summary_v1",
dataset_id="release_criteria",
project_id="moz-fx-data-bq-performance",
owner="esmyth@mozilla.com",
email=[
"dthorn@mozilla.com",
"esmyth@mozilla.com",
"telemetry-alerts@mozilla.com",
],
start_date=datetime.datetime(2020, 12, 3, 0, 0),
date_partition_parameter=None,
depends_on_past=False,
dag=dag,
)
release_criteria__stale_tests__v1 = bigquery_etl_query(
task_id="release_criteria__stale_tests__v1",
destination_table="stale_tests_v1",
dataset_id="release_criteria",
project_id="moz-fx-data-bq-performance",
owner="esmyth@mozilla.com",
email=[
"dthorn@mozilla.com",
"esmyth@mozilla.com",
"telemetry-alerts@mozilla.com",
],
start_date=datetime.datetime(2020, 12, 3, 0, 0),
date_partition_parameter=None,
depends_on_past=False,
dag=dag,
)
release_criteria__dashboard_health__v1.set_upstream(
release_criteria__rc_flattened_test_data__v1
)
release_criteria__release_criteria__v1.set_upstream(
release_criteria__rc_flattened_test_data__v1
)
release_criteria__release_criteria_summary__v1.set_upstream(
release_criteria__release_criteria__v1
)
release_criteria__stale_tests__v1.set_upstream(
release_criteria__release_criteria__v1
)

Просмотреть файл

@ -0,0 +1,17 @@
---
description: >
Firefox performance tests that do not have a defined release criteria or that
are not explicitly excluded (missing_rc), have multiple release criteria
defined for a single test (duplicate_rc), or specify the release criteria for
a test that has not run in at least 28 days (invalid_rc).
labels:
application: firefox
schedule: daily
owners:
- esmyth@mozilla.com
scheduling:
dag_name: bqetl_release_criteria
start_date: "2020-12-03"
# destination is the whole table, not a single partition,
# so don't use date_partition_parameter
date_partition_parameter: null

Просмотреть файл

@ -0,0 +1,74 @@
WITH recent_tests AS (
SELECT DISTINCT
task_group_id,
framework,
platform,
IFNULL(normalized_test_type, '') AS test_type,
test_name,
IFNULL(test_extra_options, '') AS test_extra_options,
IFNULL(subtest_name, '') AS subtest_name
FROM
rc_flattened_test_data_v1
WHERE
task_group_time >= TIMESTAMP_SUB(current_timestamp, INTERVAL 28 DAY)
),
distinct_rc AS (
SELECT
framework,
platform,
IFNULL(test_type, '') AS test_type,
test_name,
IFNULL(test_extra_options, '') AS test_extra_options,
IFNULL(subtest_name, '') AS subtest_name,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT rc_test_name), '\n') AS rc_test_name,
COUNT(*) AS defined_criteria
FROM
release_criteria_helper
GROUP BY
framework,
platform,
test_type,
test_name,
test_extra_options,
subtest_name
)
SELECT DISTINCT
distinct_rc.rc_test_name,
COALESCE(distinct_rc.framework, recent_tests.framework) AS framework,
COALESCE(distinct_rc.platform, recent_tests.platform) AS platform,
COALESCE(distinct_rc.test_type, recent_tests.test_type) AS test_type,
COALESCE(distinct_rc.test_name, recent_tests.test_name) AS test_name,
COALESCE(distinct_rc.test_extra_options, recent_tests.test_extra_options) AS test_extra_options,
COALESCE(distinct_rc.subtest_name, recent_tests.subtest_name) AS subtest_name,
CASE
WHEN
distinct_rc.defined_criteria > 1
THEN
'duplicate_rc'
WHEN
distinct_rc.rc_test_name IS NULL
THEN
'missing_rc'
WHEN
recent_tests.task_group_id IS NULL
THEN
'invalid_rc'
END
AS reason,
FROM
distinct_rc
FULL OUTER JOIN
recent_tests
USING
(framework, platform, test_name, test_type, test_extra_options, subtest_name)
WHERE
distinct_rc.rc_test_name IS NULL
OR distinct_rc.defined_criteria > 1
OR recent_tests.task_group_id IS NULL
ORDER BY
rc_test_name,
framework,
platform,
test_type,
test_name,
test_extra_options

Просмотреть файл

@ -0,0 +1,14 @@
---
friendly_name: Flattened Performance Test Data
description: Firefox performance test data flattened to 1 row per replicate
labels:
application: firefox
schedule: daily
owners:
- esmyth@mozilla.com
scheduling:
dag_name: bqetl_release_criteria
start_date: "2020-12-03"
# destination is the whole table, not a single partition,
# so don't use date_partition_parameter
date_partition_parameter: null

Просмотреть файл

@ -0,0 +1,52 @@
SELECT
test_run.taskGroupId AS task_group_id,
MAX(test_run.time) OVER (PARTITION BY test_run.taskGroupId) AS task_group_time,
test_run.platform,
`moz-fx-data-bq-performance.udf.get_form_factor`(test_run.platform) AS normalized_form_factor,
`moz-fx-data-bq-performance.udf.get_device_name`(test_run.platform) AS normalized_device_name,
`moz-fx-data-bq-performance.udf.get_os_name`(test_run.platform) AS normalized_device_os,
test_run.framework,
test_run.project,
test_run.tier AS test_tier,
test_run.recordingDate AS recording_date,
test_run.application.name AS app_name,
test_run.application.version AS app_version,
`moz-fx-data-bq-performance.udf.get_version_part`(
test_run.application.version,
0
) AS normalized_app_major_version,
test_suite.name AS test_name,
-- todo Use only test_suite.type once https://bugzilla.mozilla.org/show_bug.cgi?id=1645197 lands
`moz-fx-data-bq-performance.udf.get_normalized_test_type`(
test_run.framework,
test_run.type,
test_suite.type
) AS normalized_test_type,
test_suite.lowerIsBetter AS test_lower_is_better,
test_suite.value AS test_value,
test_suite.unit AS test_unit,
ARRAY_TO_STRING(
ARRAY(SELECT x FROM UNNEST(test_suite.extraOptions) AS x ORDER BY x),
'|'
) AS test_extra_options,
subtest.name AS subtest_name,
subtest.value AS subtest_value,
subtest.lowerIsBetter AS subtest_lower_is_better,
subtest.unit AS subtest_unit,
subtest_replicate,
subtest_replicate_offset,
test_run.type AS test_type
FROM
`moz-fx-data-taskclu-prod-8fbf.taskclusteretl.perfherder` AS test_run
LEFT JOIN
UNNEST(suites) AS test_suite
LEFT JOIN
UNNEST(test_suite.subtests) AS subtest
LEFT JOIN
UNNEST(subtest.replicates) AS subtest_replicate
WITH OFFSET AS subtest_replicate_offset
WHERE
test_run.time >= TIMESTAMP_SUB(current_timestamp, INTERVAL 52 * 7 DAY)
AND `moz-fx-data-bq-performance.udf.is_included_project`(test_run.project)
AND `moz-fx-data-bq-performance.udf.is_included_framework`(test_run.framework)
AND STRPOS(test_run.platform, '-shippable') > 0

Просмотреть файл

@ -0,0 +1,15 @@
---
description: >
Firefox performance test data related to performance release criteria for
Desktop and Mobile
labels:
application: firefox
schedule: daily
owners:
- esmyth@mozilla.com
scheduling:
dag_name: bqetl_release_criteria
start_date: "2020-12-03"
# destination is the whole table, not a single partition,
# so don't use date_partition_parameter
date_partition_parameter: null

Просмотреть файл

@ -0,0 +1,198 @@
WITH stage_1 AS (
SELECT
project,
normalized_form_factor,
normalized_device_name,
normalized_device_os,
rc_tier,
rc_test_name,
app_name,
rc_value,
rc_mean_subtest_stddev,
test_lower_is_better,
rc_one_week_prior_average,
rc_two_week_prior_average,
rc_three_week_prior_average,
rc_four_week_prior_average,
rc_one_week_prior_mean_stddev,
rc_two_week_prior_mean_stddev,
rc_three_week_prior_mean_stddev,
rc_four_week_prior_mean_stddev,
task_group_time,
task_group_id,
test_tier,
SAFE_DIVIDE(rc_one_week_prior_average - rc_value, rc_value) AS one_week_pct_change,
SAFE_DIVIDE(rc_two_week_prior_average - rc_value, rc_value) AS two_week_pct_change,
SAFE_DIVIDE(rc_three_week_prior_average - rc_value, rc_value) AS three_week_pct_change,
SAFE_DIVIDE(rc_four_week_prior_average - rc_value, rc_value) AS four_week_pct_change,
FROM
release_criteria_v1 AS a
WHERE
task_group_time = (
SELECT
MAX(task_group_time)
FROM
release_criteria_v1 AS b
WHERE
a.project = b.project
AND a.platform = b.platform
AND a.app_name = b.app_name
AND a.rc_tier = b.rc_tier
AND a.rc_test_name = b.rc_test_name
)
),
stage_2 AS (
SELECT
a.*,
b.rc_value AS target_value,
b.rc_mean_subtest_stddev AS target_stddev,
SAFE_DIVIDE(b.rc_value - a.rc_value, a.rc_value) AS target_pct_diff,
ARRAY(
SELECT
`moz-fx-data-bq-performance`.udf.is_change_significant(
a.rc_value,
a.rc_mean_subtest_stddev,
mean_old,
stddev_old
)
FROM
UNNEST(
ARRAY<STRUCT<mean_old FLOAT64, stddev_old FLOAT64>>[
(b.rc_value, b.rc_mean_subtest_stddev),
(a.rc_one_week_prior_average, a.rc_one_week_prior_mean_stddev),
(a.rc_two_week_prior_average, a.rc_two_week_prior_mean_stddev),
(a.rc_three_week_prior_average, a.rc_three_week_prior_mean_stddev),
(a.rc_four_week_prior_average, a.rc_four_week_prior_mean_stddev)
]
)
) AS change_significant,
FROM
stage_1 AS a
LEFT JOIN
(
SELECT DISTINCT
task_group_id,
project,
normalized_device_name,
normalized_device_os,
rc_tier,
rc_test_name,
rc_value,
rc_mean_subtest_stddev,
FROM
stage_1
WHERE
STARTS_WITH(app_name, 'target')
) AS b
USING
(task_group_id, project, normalized_device_name, normalized_device_os, rc_tier, rc_test_name)
WHERE
NOT STARTS_WITH(app_name, 'target')
),
stage_3 AS (
SELECT
*,
ARRAY(
SELECT
`moz-fx-data-bq-performance`.udf.interpret_change(direction, test_lower_is_better)
FROM
UNNEST(change_significant)
) AS interpreted_change,
FROM
stage_2
),
stage_4 AS (
SELECT
*,
change_significant[OFFSET(0)] AS is_target_diff_significant,
change_significant[OFFSET(1)] AS is_one_week_change_significant,
change_significant[OFFSET(2)] AS is_two_week_change_significant,
change_significant[OFFSET(3)] AS is_three_week_change_significant,
change_significant[OFFSET(4)] AS is_four_week_change_significant,
interpreted_change[OFFSET(0)] AS vs_target,
interpreted_change[OFFSET(1)] AS vs_one_week_prior,
interpreted_change[OFFSET(2)] AS vs_two_week_prior,
interpreted_change[OFFSET(3)] AS vs_three_week_prior,
interpreted_change[OFFSET(4)] AS vs_four_week_prior,
ARRAY(
SELECT
`moz-fx-data-bq-performance`.udf.get_formatted_comparison(
value,
pct,
_interpreted_change,
confidence
)
FROM
UNNEST(
ARRAY<STRUCT<value FLOAT64, pct FLOAT64, _interpreted_change STRING, confidence FLOAT64>>[
(
target_value,
target_pct_diff,
interpreted_change[OFFSET(0)],
change_significant[OFFSET(0)].confidence
),
(
rc_one_week_prior_average,
one_week_pct_change,
interpreted_change[OFFSET(1)],
change_significant[OFFSET(1)].confidence
),
(
rc_two_week_prior_average,
two_week_pct_change,
interpreted_change[OFFSET(2)],
change_significant[OFFSET(2)].confidence
),
(
rc_three_week_prior_average,
three_week_pct_change,
interpreted_change[OFFSET(3)],
change_significant[OFFSET(3)].confidence
),
(
rc_four_week_prior_average,
four_week_pct_change,
interpreted_change[OFFSET(4)],
change_significant[OFFSET(4)].confidence
)
]
)
) AS formatted_comparison,
FROM
stage_3
)
SELECT
project,
normalized_form_factor,
normalized_device_name,
normalized_device_os,
rc_tier,
rc_test_name,
app_name,
ROUND(rc_value, 2) AS `current`,
CONCAT(
DATE(task_group_time),
IF(`moz-fx-data-bq-performance.udf.is_stale_test`(task_group_time, test_tier), '', '')
) AS last_updated,
formatted_comparison[OFFSET(0)] AS target,
formatted_comparison[OFFSET(1)] AS one_week_prior,
formatted_comparison[OFFSET(2)] AS two_week_prior,
formatted_comparison[OFFSET(3)] AS three_week_prior,
formatted_comparison[OFFSET(4)] AS four_week_prior,
IF(vs_target = 'regression', rc_test_name, NULL) AS vs_target_regression,
IF(vs_target = 'improvement', rc_test_name, NULL) AS vs_target_unchanged,
IF(vs_target = 'unchanged', rc_test_name, NULL) AS vs_target_improvement,
IF(vs_one_week_prior = 'regression', rc_test_name, NULL) AS vs_one_week_prior_regression,
IF(vs_one_week_prior = 'improvement', rc_test_name, NULL) AS vs_one_week_prior_unchanged,
IF(vs_one_week_prior = 'unchanged', rc_test_name, NULL) AS vs_one_week_prior_improvement,
IF(vs_two_week_prior = 'regression', rc_test_name, NULL) AS vs_two_week_prior_regression,
IF(vs_two_week_prior = 'improvement', rc_test_name, NULL) AS vs_two_week_prior_unchanged,
IF(vs_two_week_prior = 'unchanged', rc_test_name, NULL) AS vs_two_week_prior_improvement,
IF(vs_three_week_prior = 'regression', rc_test_name, NULL) AS vs_three_week_prior_regression,
IF(vs_three_week_prior = 'improvement', rc_test_name, NULL) AS vs_three_week_prior_unchanged,
IF(vs_three_week_prior = 'unchanged', rc_test_name, NULL) AS vs_three_week_prior_improvement,
IF(vs_four_week_prior = 'regression', rc_test_name, NULL) AS vs_four_week_prior_regression,
IF(vs_four_week_prior = 'improvement', rc_test_name, NULL) AS vs_four_week_prior_unchanged,
IF(vs_four_week_prior = 'unchanged', rc_test_name, NULL) AS vs_four_week_prior_improvement,
FROM
stage_4

Просмотреть файл

@ -0,0 +1,15 @@
---
description: >
Firefox performance test data related to performance release criteria for
Desktop and Mobile
labels:
application: firefox
schedule: daily
owners:
- esmyth@mozilla.com
scheduling:
dag_name: bqetl_release_criteria
start_date: "2020-12-03"
# destination is the whole table, not a single partition,
# so don't use date_partition_parameter
date_partition_parameter: null

Просмотреть файл

@ -0,0 +1,486 @@
WITH rc_included_tests AS (
-- Filter and correct table that defines the aggregation for release criteria
SELECT
* REPLACE (
IFNULL(rc_tier, 5) AS rc_tier,
IFNULL(rc_test_aggregator, 'use_existing') AS rc_test_aggregator,
IFNULL(rc_subtest_aggregator, 'use_existing') AS rc_subtest_aggregator,
IFNULL(rc_replicate_aggregator, 'use_existing') AS rc_replicate_aggregator,
IFNULL(rc_ignore_first_replicates, 0) AS rc_ignore_first_replicates,
IFNULL(rc_target_type, 'none') AS rc_target_type,
IFNULL(test_extra_options, '') AS test_extra_options
)
FROM
release_criteria_helper
WHERE
rc_test_name IS NOT NULL
AND rc_test_name != 'exclude'
),
rc_test_data AS (
-- Join the flattened test data with the table that defines the aggregation for release criteria
SELECT
flattened.* EXCEPT (subtest_replicate_offset, test_type),
CONCAT(
flattened.task_group_id,
flattened.project,
flattened.platform,
COALESCE(flattened.app_name, ''),
included_rc.rc_tier,
included_rc.rc_test_name
) AS rc_test_aggregation_key,
included_rc.rc_test_name,
included_rc.rc_tier,
included_rc.rc_test_aggregator,
included_rc.rc_subtest_aggregator,
included_rc.rc_replicate_aggregator,
included_rc.rc_ignore_first_replicates,
included_rc.rc_target_type,
included_rc.rc_target_value,
included_rc.rc_target_app,
FROM
rc_flattened_test_data_v1 AS flattened
LEFT JOIN
rc_included_tests AS included_rc
ON
flattened.framework = included_rc.framework
AND flattened.platform = included_rc.platform
AND flattened.normalized_test_type = included_rc.test_type
AND flattened.test_name = included_rc.test_name
AND flattened.test_extra_options = included_rc.test_extra_options
AND flattened.subtest_name = included_rc.subtest_name
WHERE
rc_ignore_first_replicates < (subtest_replicate_offset + 1)
),
kurtosis_parts AS (
SELECT
* EXCEPT (subtest_replicate),
CASE
rc_replicate_aggregator
WHEN
'use_existing'
THEN
subtest_value
WHEN
'mean'
THEN
AVG(subtest_replicate) OVER (replicate_window)
WHEN
'median'
THEN
PERCENTILE_CONT(subtest_replicate, 0.5) OVER (replicate_window)
WHEN
'geomean'
THEN
-- Adding 1 to the replicate value then subtracting 1 from the result ensures that a value is always returned.
-- Maintaining here to be consistent with how the perf test framework calculates geomean: https://searchfox.org/mozilla-central/source/testing/talos/talos/filter.py#174
EXP(
AVG(LN(IF(rc_replicate_aggregator = 'geomean', subtest_replicate + 1, NULL))) OVER (
replicate_window
)
) - 1
WHEN
'max'
THEN
MAX(subtest_replicate) OVER (replicate_window)
WHEN
'min'
THEN
MIN(subtest_replicate) OVER (replicate_window)
WHEN
'sum'
THEN
SUM(subtest_replicate) OVER (replicate_window)
ELSE
ERROR(CONCAT('Unknown replicate aggregator: ', rc_replicate_aggregator))
END
AS rc_subtest_value,
AVG(subtest_replicate) OVER (replicate_window) AS rc_replicate_mean,
STDDEV_SAMP(subtest_replicate) OVER (replicate_window) AS rc_replicate_stddev,
COUNT(subtest_replicate) OVER (replicate_window) AS rc_replicate_count,
TO_JSON_STRING(ARRAY_AGG(subtest_replicate) OVER (replicate_window)) AS replicate_values_json,
SUM(1.0 * subtest_replicate) OVER (replicate_window) AS rx,
SUM(POWER(1.0 * subtest_replicate, 2)) OVER (replicate_window) AS rx2,
SUM(POWER(1.0 * subtest_replicate, 3)) OVER (replicate_window) AS rx3,
SUM(POWER(1.0 * subtest_replicate, 4)) OVER (replicate_window) AS rx4,
FROM
rc_test_data
WINDOW
replicate_window AS (
PARTITION BY
rc_test_aggregation_key,
test_name,
subtest_name
)
),
subtests AS (
-- Aggregate the in-scope replicates and calculate summary statistics
SELECT DISTINCT
* EXCEPT (rx, rx2, rx3, rx4),
`moz-fx-data-bq-performance.udf.skewness`(
rc_replicate_count,
rc_replicate_mean,
rc_replicate_stddev,
rx,
rx2,
rx3
) AS rc_replicate_skewness,
`moz-fx-data-bq-performance.udf.kurtosis`(
rc_replicate_count,
rc_replicate_mean,
rc_replicate_stddev,
rx,
rx2,
rx3,
rx4
) AS rc_replicate_kurtosis,
FROM
kurtosis_parts
),
tests AS (
-- Aggregate the in-scope sub tests and calculate the stddev and count
SELECT DISTINCT
* EXCEPT (
rc_subtest_value,
rc_replicate_mean,
rc_replicate_stddev,
rc_replicate_count,
test_tier,
subtest_name,
subtest_value,
subtest_lower_is_better,
subtest_unit
),
CASE
rc_subtest_aggregator
WHEN
'use_existing'
THEN
rc_subtest_value
WHEN
'mean'
THEN
AVG(rc_subtest_value) OVER (subtest_window)
WHEN
'median'
THEN
PERCENTILE_CONT(rc_subtest_value, 0.5) OVER (subtest_window)
WHEN
'geomean'
THEN
-- Adding 1 to the subtest value then subtracting 1 from the result ensures that a value is always returned.
-- Maintaining here to be consistent with how the perf test framework calculates geomean: https://searchfox.org/mozilla-central/source/testing/talos/talos/filter.py#174
EXP(
AVG(LN(IF(rc_subtest_aggregator = 'geomean', rc_subtest_value + 1, NULL))) OVER (
subtest_window
)
) - 1
WHEN
'max'
THEN
MAX(rc_subtest_value) OVER (subtest_window)
WHEN
'min'
THEN
MIN(rc_subtest_value) OVER (subtest_window)
WHEN
'sum'
THEN
SUM(rc_subtest_value) OVER (subtest_window)
ELSE
ERROR(CONCAT('Unknown subtest aggregator: ', rc_subtest_aggregator))
END
AS rc_test_value,
AVG(rc_replicate_mean) OVER (subtest_window) AS rc_subtest_mean,
SQRT(
SUM(POWER(rc_replicate_stddev, 2)) OVER (subtest_window)
) AS rc_propagated_error, -- Propagate error through aggregation
MAX(rc_replicate_stddev) OVER (subtest_window) AS rc_max_subtest_stddev,
AVG(rc_replicate_stddev) OVER (subtest_window) AS rc_mean_subtest_stddev,
COUNT(DISTINCT subtest_name) OVER (subtest_window) AS rc_subtest_count,
-- AVG(rc_replicate_count) OVER(subtest_window) AS rc_subtest_avg_replicates,
-- Would prefer to just save the array as a JSON string but trying to use `ARRAY_CONCAT_AGG` in the next stage produces an unsupported error
ARRAY_TO_STRING(
ARRAY_AGG(CAST(rc_replicate_count AS STRING)) OVER (subtest_window),
','
) AS rc_subtest_replicate_counts,
MIN(test_tier) OVER (subtest_window) AS test_tier,
FROM
subtests
WINDOW
subtest_window AS (
PARTITION BY
rc_test_aggregation_key,
test_name
)
),
rc_tests AS (
-- Aggregate the in-scope sub tests and calculate the stddev and count
SELECT DISTINCT
* EXCEPT (
rc_test_value,
rc_subtest_mean,
rc_propagated_error,
rc_max_subtest_stddev,
rc_mean_subtest_stddev,
rc_subtest_count,
rc_subtest_replicate_counts,
test_name,
test_value,
test_tier
),
CASE
rc_subtest_aggregator
WHEN
'use_existing'
THEN
rc_test_value
WHEN
'mean'
THEN
AVG(rc_test_value) OVER (test_window)
WHEN
'median'
THEN
PERCENTILE_CONT(rc_test_value, 0.5) OVER (test_window)
-- Adding 1 to the subtest value then subtracting 1 from the result ensures that a value is always returned.
-- Maintaining here to be consistent with how the perf test framework calculates geomean: https://searchfox.org/mozilla-central/source/testing/talos/talos/filter.py#174
WHEN
'geomean'
THEN
EXP(
AVG(LN(IF(rc_test_aggregator = 'geomean', rc_test_value + 1, NULL))) OVER (test_window)
) - 1
WHEN
'max'
THEN
MAX(rc_test_value) OVER (test_window)
WHEN
'min'
THEN
MIN(rc_test_value) OVER (test_window)
WHEN
'sum'
THEN
SUM(rc_test_value) OVER (test_window)
ELSE
ERROR(CONCAT('Unknown subtest aggregator: ', rc_subtest_aggregator))
END
AS rc_value,
AVG(rc_subtest_mean) OVER (test_window) AS rc_test_mean,
SQRT(
SUM(POWER(rc_propagated_error, 2)) OVER (test_window)
) AS rc_propagated_error, -- Propagate error through aggregation
MAX(rc_max_subtest_stddev) OVER (test_window) AS rc_max_subtest_stddev,
AVG(rc_mean_subtest_stddev) OVER (test_window) AS rc_mean_subtest_stddev,
COUNT(DISTINCT test_name) OVER (test_window) AS rc_test_count,
AVG(rc_subtest_count) OVER (test_window) AS rc_test_avg_subtests,
ARRAY_TO_STRING(
ARRAY_AGG(CAST(rc_subtest_replicate_counts AS STRING)) OVER (test_window),
','
) AS rc_test_replicate_counts,
MIN(test_tier) OVER (test_window) AS test_tier,
FROM
tests
WINDOW
test_window AS (
PARTITION BY
rc_test_aggregation_key
)
),
versions AS (
SELECT DISTINCT
date_utc,
category,
MAX(`moz-fx-data-bq-performance`.udf.get_version_part(version, 0)) AS major_version,
FROM
`dp2-prod`.sumo.release_calendar
GROUP BY
date_utc,
category
),
rc_tests_with_tested_version AS (
-- Order the columns and reduce to the distinct aggregated tests (quality control on calculations is possible on aggregation temporary tables)
SELECT DISTINCT
task_group_time,
platform,
normalized_form_factor,
normalized_device_name,
normalized_device_os,
framework,
project,
rc_tier,
recording_date,
app_name,
app_version,
normalized_app_major_version,
rc_test_name,
normalized_test_type,
rc_value,
rc_propagated_error,
rc_max_subtest_stddev,
rc_mean_subtest_stddev,
rc_test_count,
rc_test_avg_subtests,
TO_JSON_STRING(
(
SELECT
APPROX_QUANTILES(CAST(x AS INT64), 3)
FROM
UNNEST(SPLIT(rc_test_replicate_counts, ',')) AS x
)
) AS replicate_quantiles,
rc_target_type,
rc_target_value,
rc_target_app,
test_lower_is_better,
test_unit,
IF(
project IN ('fenix', 'mozilla-central', 'autoland'),
major_version + 1,
major_version
) AS firefox_version,
rc_test_aggregation_key,
task_group_id,
test_tier
FROM
rc_tests
LEFT JOIN
versions
ON
date_utc = DATE(task_group_time)
AND (
(project IN ('fenix', 'mozilla-central', 'mozilla-beta', 'autoland') AND category = 'dev')
OR (project = 'mozilla-release' AND category = 'stability')
OR (project = 'mozilla-esr68' AND category = 'esr')
)
WHERE
rc_value IS NOT NULL
),
build_targets AS (
-- Builds the results rows for tests with a relative or absolute target value
SELECT
* REPLACE (
CASE
WHEN
rc_target_type = 'absolute'
THEN
'target'
WHEN
rc_target_type = 'relative'
AND rc_target_app IS NOT NULL
THEN
CONCAT('target (vs ', rc_target_app, ')')
WHEN
rc_target_type = 'relative'
AND rc_target_app IS NULL
THEN
ERROR(
CONCAT(
'Relative target with null rc_target_app: ',
platform,
framework,
project,
rc_test_name
)
)
END
AS app_name,
CASE
WHEN
rc_target_type = 'absolute'
THEN
rc_target_value
WHEN
rc_target_type = 'relative'
THEN
rc_target_value * rc_value
END
AS rc_value
)
FROM
rc_tests_with_tested_version
WHERE
rc_target_type = 'absolute'
OR (rc_target_type = 'relative' AND rc_target_app = app_name)
),
rc_tests_with_targets AS (
SELECT
*
FROM
rc_tests_with_tested_version
UNION ALL
SELECT
*
FROM
build_targets
)
SELECT DISTINCT
*,
AVG(rc_value) OVER (one_week_window) AS rc_one_week_prior_average,
SQRT(
SUM(POWER(rc_propagated_error, 2)) OVER (one_week_window)
) AS rc_one_week_prior_propagated_error,
MAX(rc_max_subtest_stddev) OVER (one_week_window) AS rc_one_week_prior_max_stddev,
AVG(rc_mean_subtest_stddev) OVER (one_week_window) AS rc_one_week_prior_mean_stddev,
COUNT(rc_value) OVER (one_week_window) AS rc_one_week_prior_count,
AVG(rc_value) OVER (two_week_window) AS rc_two_week_prior_average,
SQRT(
SUM(POWER(rc_propagated_error, 2)) OVER (two_week_window)
) AS rc_two_week_prior_propagated_error,
MAX(rc_max_subtest_stddev) OVER (two_week_window) AS rc_two_week_prior_max_stddev,
AVG(rc_mean_subtest_stddev) OVER (two_week_window) AS rc_two_week_prior_mean_stddev,
COUNT(rc_value) OVER (two_week_window) AS rc_two_week_prior_count,
AVG(rc_value) OVER (three_week_window) AS rc_three_week_prior_average,
SQRT(
SUM(POWER(rc_propagated_error, 2)) OVER (three_week_window)
) AS rc_three_week_prior_propagated_error,
MAX(rc_max_subtest_stddev) OVER (three_week_window) AS rc_three_week_prior_max_stddev,
AVG(rc_mean_subtest_stddev) OVER (three_week_window) AS rc_three_week_prior_mean_stddev,
COUNT(rc_value) OVER (three_week_window) AS rc_three_week_prior_count,
AVG(rc_value) OVER (four_week_window) AS rc_four_week_prior_average,
SQRT(
SUM(POWER(rc_propagated_error, 2)) OVER (four_week_window)
) AS rc_four_week_prior_propagated_error,
MAX(rc_max_subtest_stddev) OVER (four_week_window) AS rc_four_week_prior_max_stddev,
AVG(rc_mean_subtest_stddev) OVER (four_week_window) AS rc_four_week_prior_mean_stddev,
COUNT(rc_value) OVER (four_week_window) AS rc_four_week_prior_count,
FROM
rc_tests_with_targets
WINDOW
moving_average_window AS (
PARTITION BY
project,
platform,
rc_tier,
rc_test_name,
app_name
ORDER BY
UNIX_DATE(DATE(task_group_time)) ASC
),
one_week_window AS (
moving_average_window
RANGE BETWEEN
13 PRECEDING
AND 7 PRECEDING
),
two_week_window AS (
moving_average_window
RANGE BETWEEN
20 PRECEDING
AND 14 PRECEDING
),
three_week_window AS (
moving_average_window
RANGE BETWEEN
27 PRECEDING
AND 21 PRECEDING
),
four_week_window AS (
moving_average_window
RANGE BETWEEN
34 PRECEDING
AND 28 PRECEDING
)
ORDER BY
task_group_time DESC,
rc_test_aggregation_key

Просмотреть файл

@ -0,0 +1,15 @@
---
description: >
Firefox performance tests that have not run as recently as expected by their
test tier
labels:
application: firefox
schedule: daily
owners:
- esmyth@mozilla.com
scheduling:
dag_name: bqetl_release_criteria
start_date: "2020-12-03"
# destination is the whole table, not a single partition,
# so don't use date_partition_parameter
date_partition_parameter: null

Просмотреть файл

@ -0,0 +1,23 @@
SELECT DISTINCT
project,
platform,
framework,
test_tier,
rc_test_name,
task_group_time
FROM
release_criteria_v1 AS a
WHERE
`moz-fx-data-bq-performance.udf.is_stale_test`(task_group_time, test_tier)
AND task_group_time = (
SELECT
MAX(task_group_time)
FROM
release_criteria_v1 AS b
WHERE
a.project = b.project
AND a.platform = b.platform
AND a.app_name = b.app_name
AND a.rc_tier = b.rc_tier
AND a.rc_test_name = b.rc_test_name
)