Backed out 5 changesets (bug 1548845) for failing new youtube playback raptor tests. CLOSED TREE

Backed out changeset 934d2f88195d (bug 1548845)
Backed out changeset 609f489bdc8c (bug 1548845)
Backed out changeset a2544ca8c593 (bug 1548845)
Backed out changeset 152615db9db6 (bug 1548845)
Backed out changeset 6b3a8394727f (bug 1548845)

--HG--
rename : testing/raptor/raptor/filters.py => testing/raptor/raptor/filter.py
This commit is contained in:
Mihai Alexandru Michis 2019-05-17 16:17:06 +03:00
Родитель 90ec1a4ed1
Коммит 724354e338
10 изменённых файлов: 63 добавлений и 205 удалений

Просмотреть файл

@ -1579,19 +1579,6 @@ raptor-wasm-godot-ion-firefox-profiling:
- --test=raptor-wasm-godot-ion
- --gecko-profile
raptor-youtube-playback-firefox:
description: "Raptor YouTube Playback on Firefox"
try-name: raptor-youtube-playback-firefox
treeherder-symbol: Rap(ytp)
max-run-time:
by-test-platform:
windows10-aarch64/opt: 3600
default: 2700
tier: 2
mozharness:
extra-options:
- --test=raptor-youtube-playback
raptor-tp6-1-firefox-cold:
description: "Raptor tp6-1 cold page-load on Firefox"
try-name: raptor-tp6-1-firefox-cold

Просмотреть файл

@ -100,7 +100,6 @@ raptor-firefox:
- raptor-webaudio-firefox
- raptor-sunspider-firefox
- raptor-wasm-godot-firefox
- raptor-youtube-playback-firefox
- raptor-tp6-1-firefox-cold
- raptor-tp6-2-firefox-cold
- raptor-tp6-3-firefox-cold

Просмотреть файл

@ -16,10 +16,10 @@ Each filter is a simple function, but it also have attached a special
`prepare` method that create a tuple with one instance of a
:class:`Filter`; this allow to write stuff like::
from raptor import filters
filter_list = filters.ignore_first.prepare(1) + filters.median.prepare()
from raptor import filter
filters = filter.ignore_first.prepare(1) + filter.median.prepare()
for filter in filter_list:
for filter in filters:
data = filter(data)
# data is filtered
"""

Просмотреть файл

@ -15,8 +15,6 @@ raptor_ini = os.path.join(here, 'raptor.ini')
tests_dir = os.path.join(here, 'tests')
LOG = get_proxy_logger(component="raptor-manifest")
LIVE_SITE_TIMEOUT_MULTIPLIER = 1.2
required_settings = [
'alert_threshold',
'apps',
@ -166,9 +164,6 @@ def write_test_settings_json(args, test_details, oskey):
test_settings['raptor-options']['subtest_lower_is_better'] = bool_from_str(
subtest_lower_is_better)
if test_details.get("alert_change_type", None) is not None:
test_settings['raptor-options']['alert_change_type'] = test_details['alert_change_type']
if test_details.get("alert_threshold", None) is not None:
test_settings['raptor-options']['alert_threshold'] = float(test_details['alert_threshold'])
@ -332,10 +327,8 @@ def get_raptor_test_list(args, oskey):
next_test['playback'] = None
LOG.info("using live sites so appending '-live' to the test name")
next_test['name'] = next_test['name'] + "-live"
# allow a slightly higher page timeout due to remote page loads
next_test['page_timeout'] = int(
next_test['page_timeout']) * LIVE_SITE_TIMEOUT_MULTIPLIER
LOG.info("using live sites so using page timeout of %dms" % next_test['page_timeout'])
# we also want to increase the page timeout since may be longer live
next_test['page_timeout'] = 180000
# convert 'measure =' test INI line to list
if next_test.get('measure') is not None:

Просмотреть файл

@ -8,7 +8,7 @@
"""output raptor test results"""
from __future__ import absolute_import
import filters
import filter
import json
import os
@ -60,10 +60,6 @@ class Output(object):
'alertThreshold': float(test.alert_threshold)
}
# Check if optional properties have been set by the test
if hasattr(test, "alert_change_type"):
suite['alertChangeType'] = test.alert_change_type
# if cold load add that info to the suite result dict; this will be used later
# when combining the results from multiple browser cycles into one overall result
if test.cold is True:
@ -101,7 +97,7 @@ class Output(object):
# for warm page-load, ignore first value due to 1st pageload noise
LOG.info("ignoring the first %s value due to initial pageload noise"
% measurement_name)
filtered_values = filters.ignore_first(new_subtest['replicates'], 1)
filtered_values = filter.ignore_first(new_subtest['replicates'], 1)
else:
# for cold-load we want all the values
filtered_values = new_subtest['replicates']
@ -111,7 +107,7 @@ class Output(object):
# cases where TTFI is not available, which is acceptable; however we don't want
# to include those '-1' TTFI values in our final results calculations
if measurement_name == "ttfi":
filtered_values = filters.ignore_negative(filtered_values)
filtered_values = filter.ignore_negative(filtered_values)
# we've already removed the first pageload value; if there aren't any more
# valid TTFI values available for this pageload just remove it from results
if len(filtered_values) < 1:
@ -125,30 +121,28 @@ class Output(object):
% measurement_name)
new_subtest['shouldAlert'] = True
new_subtest['value'] = filters.median(filtered_values)
new_subtest['value'] = filter.median(filtered_values)
vals.append([new_subtest['value'], new_subtest['name']])
subtests.append(new_subtest)
elif test.type == "benchmark":
if 'assorted-dom' in test.measurements:
subtests, vals = self.parseAssortedDomOutput(test)
if 'speedometer' in test.measurements:
subtests, vals = self.parseSpeedometerOutput(test)
elif 'motionmark' in test.measurements:
subtests, vals = self.parseMotionmarkOutput(test)
elif 'speedometer' in test.measurements:
subtests, vals = self.parseSpeedometerOutput(test)
elif 'sunspider' in test.measurements:
subtests, vals = self.parseSunspiderOutput(test)
elif 'unity-webgl' in test.measurements:
subtests, vals = self.parseUnityWebGLOutput(test)
elif 'wasm-godot' in test.measurements:
subtests, vals = self.parseWASMGodotOutput(test)
elif 'wasm-misc' in test.measurements:
subtests, vals = self.parseWASMMiscOutput(test)
elif 'webaudio' in test.measurements:
subtests, vals = self.parseWebaudioOutput(test)
elif 'youtube-playbackperf-test' in test.measurements:
subtests, vals = self.parseYoutubePlaybackPerformanceOutput(test)
elif 'unity-webgl' in test.measurements:
subtests, vals = self.parseUnityWebGLOutput(test)
elif 'assorted-dom' in test.measurements:
subtests, vals = self.parseAssortedDomOutput(test)
elif 'wasm-misc' in test.measurements:
subtests, vals = self.parseWASMMiscOutput(test)
elif 'wasm-godot' in test.measurements:
subtests, vals = self.parseWASMGodotOutput(test)
suite['subtests'] = subtests
else:
@ -274,7 +268,7 @@ class Output(object):
vals = []
for next_sub in combined_suites[name]['subtests']:
# calculate sub-test results (i.e. each measurement type)
next_sub['value'] = filters.median(next_sub['replicates'])
next_sub['value'] = filter.median(next_sub['replicates'])
# add to vals; vals is used to calculate overall suite result i.e. the
# geomean of all of the subtests / measurement types
vals.append([next_sub['value'], next_sub['name']])
@ -406,7 +400,7 @@ class Output(object):
names = _subtests.keys()
names.sort(reverse=True)
for name in names:
_subtests[name]['value'] = filters.median(_subtests[name]['replicates'])
_subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
subtests.append(_subtests[name])
vals.append([_subtests[name]['value'], name])
@ -443,7 +437,7 @@ class Output(object):
names = _subtests.keys()
names.sort(reverse=True)
for name in names:
_subtests[name]['value'] = filters.median(_subtests[name]['replicates'])
_subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
subtests.append(_subtests[name])
vals.append([_subtests[name]['value'], name])
@ -482,7 +476,7 @@ class Output(object):
names = _subtests.keys()
names.sort(reverse=True)
for name in names:
_subtests[name]['value'] = filters.median(_subtests[name]['replicates'])
_subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
subtests.append(_subtests[name])
vals.append([_subtests[name]['value'], name])
@ -529,7 +523,7 @@ class Output(object):
names = _subtests.keys()
names.sort(reverse=True)
for name in names:
_subtests[name]['value'] = filters.median(_subtests[name]['replicates'])
_subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
subtests.append(_subtests[name])
vals.append([_subtests[name]['value'], name])
@ -584,7 +578,7 @@ class Output(object):
names = _subtests.keys()
names.sort(reverse=True)
for name in names:
_subtests[name]['value'] = filters.median(_subtests[name]['replicates'])
_subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
subtests.append(_subtests[name])
vals.append([_subtests[name]['value'], name])
@ -611,7 +605,7 @@ class Output(object):
names = _subtests.keys()
names.sort(reverse=True)
for name in names:
_subtests[name]['value'] = filters.mean(_subtests[name]['replicates'])
_subtests[name]['value'] = filter.mean(_subtests[name]['replicates'])
subtests.append(_subtests[name])
vals.append([_subtests[name]['value'], name])
@ -656,7 +650,7 @@ class Output(object):
names = _subtests.keys()
names.sort(reverse=True)
for name in names:
_subtests[name]['value'] = filters.median(_subtests[name]['replicates'])
_subtests[name]['value'] = filter.median(_subtests[name]['replicates'])
subtests.append(_subtests[name])
vals.append([_subtests[name]['value'], name])
@ -695,7 +689,7 @@ class Output(object):
names = _subtests.keys()
names.sort(reverse=True)
for name in names:
_subtests[name]['value'] = round(filters.median(_subtests[name]['replicates']), 2)
_subtests[name]['value'] = round(filter.median(_subtests[name]['replicates']), 2)
subtests.append(_subtests[name])
# only use the 'total's to compute the overall result
if name == 'total':
@ -703,72 +697,6 @@ class Output(object):
return subtests, vals
def parseYoutubePlaybackPerformanceOutput(self, test):
"""Parse the metrics for the Youtube playback performance test.
For each video measured values for dropped and decoded frames will be
available from the benchmark site.
{u'PlaybackPerf.VP9.2160p60@2X': {u'droppedFrames': 1, u'decodedFrames': 796}
With each page cycle / iteration of the test multiple values can be present.
Raptor will calculate the percentage of dropped frames to decoded frames.
All those three values will then be emitted as separate sub tests.
"""
_subtests = {}
data = test.measurements['youtube-playbackperf-test']
def create_subtest_entry(name, value,
unit=test.subtest_unit,
lower_is_better=test.subtest_lower_is_better):
# build a list of subtests and append all related replicates
if name not in _subtests.keys():
# subtest not added yet, first pagecycle, so add new one
_subtests[name] = {
'name': name,
'unit': unit,
'lowerIsBetter': lower_is_better,
'replicates': [],
}
_subtests[name]['replicates'].append(value)
for pagecycle in data:
for _sub, _value in pagecycle[0].iteritems():
try:
percent_dropped = float(_value['droppedFrames']) / _value['decodedFrames']
except ZeroDivisionError:
# if no frames have been decoded the playback failed completely
percent_dropped = 1
# Remove the not needed "PlaybackPerf." prefix from each test
_sub = _sub.split('PlaybackPerf.', 1)[-1]
# build a list of subtests and append all related replicates
create_subtest_entry("{}_decoded_frames".format(_sub),
_value['decodedFrames'],
lower_is_better=False,
)
create_subtest_entry("{}_dropped_frames".format(_sub),
_value['droppedFrames'],
)
create_subtest_entry("{}_%_dropped_frames".format(_sub),
percent_dropped,
)
vals = []
subtests = []
names = _subtests.keys()
names.sort(reverse=True)
for name in names:
_subtests[name]['value'] = round(filters.median(_subtests[name]['replicates']), 2)
subtests.append(_subtests[name])
if name.endswith("dropped_frames"):
vals.append([_subtests[name]['value'], name])
return subtests, vals
def summarize_screenshots(self, screenshots):
if len(screenshots) == 0:
return
@ -898,7 +826,7 @@ class Output(object):
@classmethod
def v8_Metric(cls, val_list):
results = [i for i, j in val_list]
score = 100 * filters.geometric_mean(results)
score = 100 * filter.geometric_mean(results)
return score
@classmethod
@ -921,7 +849,7 @@ class Output(object):
raise Exception("Speedometer has 160 subtests, found: %s instead" % len(results))
results = results[9::10]
score = 60 * 1000 / filters.geometric_mean(results) / correctionFactor
score = 60 * 1000 / filter.geometric_mean(results) / correctionFactor
return score
@classmethod
@ -930,7 +858,7 @@ class Output(object):
benchmark_score: ares6/jetstream self reported as 'geomean'
"""
results = [i for i, j in val_list if j == 'geomean']
return filters.mean(results)
return filter.mean(results)
@classmethod
def webaudio_score(cls, val_list):
@ -938,7 +866,7 @@ class Output(object):
webaudio_score: self reported as 'Geometric Mean'
"""
results = [i for i, j in val_list if j == 'Geometric Mean']
return filters.mean(results)
return filter.mean(results)
@classmethod
def unity_webgl_score(cls, val_list):
@ -946,7 +874,7 @@ class Output(object):
unity_webgl_score: self reported as 'Geometric Mean'
"""
results = [i for i, j in val_list if j == 'Geometric Mean']
return filters.mean(results)
return filter.mean(results)
@classmethod
def wasm_misc_score(cls, val_list):
@ -954,7 +882,7 @@ class Output(object):
wasm_misc_score: self reported as '__total__'
"""
results = [i for i, j in val_list if j == '__total__']
return filters.mean(results)
return filter.mean(results)
@classmethod
def wasm_godot_score(cls, val_list):
@ -962,7 +890,7 @@ class Output(object):
wasm_godot_score: first-interactive mean
"""
results = [i for i, j in val_list if j == 'first-interactive']
return filters.mean(results)
return filter.mean(results)
@classmethod
def stylebench_score(cls, val_list):
@ -1008,7 +936,7 @@ class Output(object):
raise Exception("StyleBench has 380 entries, found: %s instead" % len(results))
results = results[75::76]
score = 60 * 1000 / filters.geometric_mean(results) / correctionFactor
score = 60 * 1000 / filter.geometric_mean(results) / correctionFactor
return score
@classmethod
@ -1019,13 +947,7 @@ class Output(object):
@classmethod
def assorted_dom_score(cls, val_list):
results = [i for i, j in val_list]
return round(filters.geometric_mean(results), 2)
@classmethod
def youtube_playback_performance_score(cls, val_list):
"""Calculate percentage of failed tests."""
results = [i for i, j in val_list]
return round(filters.mean(results), 2)
return round(filter.geometric_mean(results), 2)
@classmethod
def supporting_data_total(cls, val_list):
@ -1055,11 +977,9 @@ class Output(object):
return self.wasm_misc_score(vals)
elif testname.startswith('raptor-wasm-godot'):
return self.wasm_godot_score(vals)
elif testname.startswith('raptor-youtube-playback'):
return self.youtube_playback_performance_score(vals)
elif testname.startswith('supporting_data'):
return self.supporting_data_total(vals)
elif len(vals) > 1:
return round(filters.geometric_mean([i for i, j in vals]), 2)
return round(filter.geometric_mean([i for i, j in vals]), 2)
else:
return round(filters.mean([i for i, j in vals]), 2)
return round(filter.mean([i for i, j in vals]), 2)

Просмотреть файл

@ -55,7 +55,6 @@
[include:tests/raptor-stylebench.ini]
[include:tests/raptor-sunspider.ini]
[include:tests/raptor-unity-webgl.ini]
[include:tests/raptor-youtube-playback.ini]
[include:tests/raptor-wasm-godot.ini]
[include:tests/raptor-wasm-godot-baseline.ini]
[include:tests/raptor-wasm-godot-ion.ini]

Просмотреть файл

@ -1,32 +0,0 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# Youtube playback performance benchmark
#
# Original location of source and media files:
# https://ytlr-cert.appspot.com/2019/main.html?test_type=playbackperf-test
[DEFAULT]
type = benchmark
use_live_sites = true
gecko_profile_interval = 1
gecko_profile_entries = 14000000
gecko_profile_threads = MediaPlayback
test_url = http://yttest.dev.mozaws.net/2019/main.html?test_type=playbackperf-test&raptor=true&command=run&exclude=1,2
page_cycles = 1
# account for a page cycle duration of at maximum 45 minutes
page_timeout = 2700000
alert_threshold = 2.0
lower_is_better = true
unit = score
subtest_lower_is_better = true
subtest_unit = score
# TODO: Allow the host / port option in the manifest (Bug 1547932)
preferences = {"network.proxy.type": 0}
[raptor-youtube-playback-firefox]
apps = firefox
[raptor-youtube-playback-geckoview]
apps = geckoview

Просмотреть файл

@ -51,7 +51,6 @@ VALID_MANIFESTS = [{
'type': 'pageload',
'unit': 'ms',
'alert_change_type': None,
'alert_on': None,
'playback': None,
}, {

Просмотреть файл

@ -52,18 +52,15 @@
"run_at": "document_end"
},
{
"matches": [
"*://*/Speedometer/index.html*",
"*://*/StyleBench/*",
"*://*/MotionMark/*",
"*://*/SunSpider/*",
"*://*/webaudio/*",
"*://*/unity-webgl/index.html*",
"*://*/wasm-misc/index.html*",
"*://*/wasm-godot/index.html*",
"*://*/assorted-dom/assorted/results.html*",
"*://*.mozaws.net/*"
],
"matches": ["*://*/Speedometer/index.html*",
"*://*/StyleBench/*",
"*://*/MotionMark/*",
"*://*/SunSpider/*",
"*://*/webaudio/*",
"*://*/unity-webgl/index.html*",
"*://*/wasm-misc/index.html*",
"*://*/wasm-godot/index.html*",
"*://*/assorted-dom/assorted/results.html*"],
"js": ["benchmark.js"],
"run_at": "document_end"
}

Просмотреть файл

@ -68,18 +68,15 @@ var geckoThreads = [];
var debugMode = 0;
var screenCapture = false;
var results = {
"name": "",
"page": "",
"type": "",
"browser_cycle": 0,
"expected_browser_cycles": 0,
"cold": false,
"lower_is_better": true,
"alert_change_type": "relative",
"alert_threshold": 2.0,
"measurements": {},
};
var results = {"name": "",
"page": "",
"type": "",
"browser_cycle": 0,
"expected_browser_cycles": 0,
"cold": false,
"lower_is_better": true,
"alert_threshold": 2.0,
"measurements": {}};
function getTestSettings() {
console.log("getting test settings from control server");
@ -111,18 +108,17 @@ function getTestSettings() {
console.log(`testURL: ${testURL}`);
results.alert_change_type = settings.alert_change_type;
results.alert_threshold = settings.alert_threshold;
results.browser_cycle = browserCycle;
results.cold = settings.cold;
results.expected_browser_cycles = settings.expected_browser_cycles;
results.lower_is_better = settings.lower_is_better === true;
results.name = testName;
results.page = testURL;
results.type = testType;
results.name = testName;
results.browser_cycle = browserCycle;
results.expected_browser_cycles = settings.expected_browser_cycles;
results.cold = settings.cold;
results.unit = settings.unit;
results.subtest_unit = settings.subtest_unit;
results.lower_is_better = settings.lower_is_better === true;
results.subtest_lower_is_better = settings.subtest_lower_is_better === true;
results.alert_threshold = settings.alert_threshold;
if (settings.gecko_profile === true) {
results.extra_options = ["gecko_profile"];