зеркало из https://github.com/mozilla/gecko-dev.git
Backed out changeset 67e5d2425c75 (bug 1565316) for causing raptor wasm failures. CLOSED TREE
This commit is contained in:
Родитель
21b23f7c9d
Коммит
2c2409c49e
|
@ -195,19 +195,21 @@ def write_test_settings_json(args, test_details, oskey):
|
|||
|
||||
test_settings['raptor-options']['unit'] = test_details.get("unit", "ms")
|
||||
|
||||
test_settings['raptor-options']['lower_is_better'] = test_details.get("lower_is_better", True)
|
||||
test_settings['raptor-options']['lower_is_better'] = bool_from_str(
|
||||
test_details.get("lower_is_better", "true"))
|
||||
|
||||
# support optional subtest unit/lower_is_better fields
|
||||
val = test_details.get('subtest_unit', test_settings['raptor-options']['unit'])
|
||||
test_settings['raptor-options']['subtest_unit'] = val
|
||||
subtest_lower_is_better = test_details.get('subtest_lower_is_better')
|
||||
subtest_lower_is_better = test_details.get('subtest_lower_is_better', None)
|
||||
|
||||
if subtest_lower_is_better is None:
|
||||
# default to main test values if not set
|
||||
test_settings['raptor-options']['subtest_lower_is_better'] = (
|
||||
test_settings['raptor-options']['lower_is_better'])
|
||||
else:
|
||||
test_settings['raptor-options']['subtest_lower_is_better'] = subtest_lower_is_better
|
||||
test_settings['raptor-options']['subtest_lower_is_better'] = bool_from_str(
|
||||
subtest_lower_is_better)
|
||||
|
||||
if test_details.get("alert_change_type", None) is not None:
|
||||
test_settings['raptor-options']['alert_change_type'] = test_details['alert_change_type']
|
||||
|
@ -415,9 +417,6 @@ def get_raptor_test_list(args, oskey):
|
|||
# remove the 'hero =' line since no longer measuring hero
|
||||
del next_test['hero']
|
||||
|
||||
if next_test.get('lower_is_better') is not None:
|
||||
next_test['lower_is_better'] = bool_from_str(next_test.get('lower_is_better'))
|
||||
|
||||
# write out .json test setting files for the control server to read and send to web ext
|
||||
if len(tests_to_run) != 0:
|
||||
for test in tests_to_run:
|
||||
|
|
|
@ -13,16 +13,13 @@ import filters
|
|||
import json
|
||||
import os
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from logger.logger import RaptorLogger
|
||||
|
||||
LOG = RaptorLogger(component='perftest-output')
|
||||
LOG = RaptorLogger(component='raptor-output')
|
||||
|
||||
|
||||
class PerftestOutput(object):
|
||||
"""Abstract base class to handle output of perftest results"""
|
||||
|
||||
__metaclass__ = ABCMeta
|
||||
class Output(object):
|
||||
"""class for raptor output"""
|
||||
|
||||
def __init__(self, results, supporting_data, subtest_alert_on):
|
||||
"""
|
||||
|
@ -35,290 +32,6 @@ class PerftestOutput(object):
|
|||
self.summarized_screenshots = []
|
||||
self.subtest_alert_on = subtest_alert_on
|
||||
|
||||
@abstractmethod
|
||||
def summarize(self, test_names):
|
||||
raise NotImplementedError()
|
||||
|
||||
def summarize_supporting_data(self):
|
||||
'''
|
||||
Supporting data was gathered outside of the main raptor test; it will be kept
|
||||
separate from the main raptor test results. Summarize it appropriately.
|
||||
|
||||
supporting_data = {'type': 'data-type',
|
||||
'test': 'raptor-test-ran-when-data-was-gathered',
|
||||
'unit': 'unit that the values are in',
|
||||
'values': {
|
||||
'name': value,
|
||||
'nameN': valueN}}
|
||||
|
||||
More specifically, power data will look like this:
|
||||
|
||||
supporting_data = {'type': 'power',
|
||||
'test': 'raptor-speedometer-geckoview',
|
||||
'unit': 'mAh',
|
||||
'values': {
|
||||
'cpu': cpu,
|
||||
'wifi': wifi,
|
||||
'screen': screen,
|
||||
'proportional': proportional}}
|
||||
|
||||
We want to treat each value as a 'subtest'; and for the overall aggregated
|
||||
test result, we'll sum together all subtest values.
|
||||
'''
|
||||
if self.supporting_data is None:
|
||||
return
|
||||
|
||||
self.summarized_supporting_data = []
|
||||
|
||||
for data_set in self.supporting_data:
|
||||
suites = []
|
||||
test_results = {
|
||||
'framework': {
|
||||
'name': 'raptor',
|
||||
},
|
||||
'suites': suites,
|
||||
}
|
||||
|
||||
data_type = data_set['type']
|
||||
LOG.info("summarizing %s data" % data_type)
|
||||
|
||||
# suite name will be name of the actual raptor test that ran, plus the type of
|
||||
# supporting data i.e. 'raptor-speedometer-geckoview-power'
|
||||
vals = []
|
||||
subtests = []
|
||||
suite = {
|
||||
'name': data_set['test'] + "-" + data_set['type'],
|
||||
'type': data_set['type'],
|
||||
'subtests': subtests,
|
||||
'lowerIsBetter': True,
|
||||
'unit': data_set['unit'],
|
||||
'alertThreshold': 2.0
|
||||
}
|
||||
|
||||
suites.append(suite)
|
||||
|
||||
# each supporting data measurement becomes a subtest, with the measurement type
|
||||
# used for the subtest name. i.e. 'raptor-speedometer-geckoview-power-cpu'
|
||||
# the overall 'suite' value for supporting data will be the sum of all measurements
|
||||
for measurement_name, value in data_set['values'].iteritems():
|
||||
new_subtest = {}
|
||||
new_subtest['name'] = data_set['test'] + "-" + data_type + "-" + measurement_name
|
||||
new_subtest['value'] = value
|
||||
new_subtest['lowerIsBetter'] = True
|
||||
new_subtest['alertThreshold'] = 2.0
|
||||
new_subtest['unit'] = data_set['unit']
|
||||
subtests.append(new_subtest)
|
||||
vals.append([new_subtest['value'], new_subtest['name']])
|
||||
|
||||
if len(subtests) > 1:
|
||||
suite['value'] = self.construct_summary(vals, testname="supporting_data")
|
||||
|
||||
subtests.sort(key=lambda subtest: subtest['name'])
|
||||
suites.sort(key=lambda suite: suite['name'])
|
||||
|
||||
self.summarized_supporting_data.append(test_results)
|
||||
|
||||
return
|
||||
|
||||
def output(self, test_names):
|
||||
"""output to file and perfherder data json"""
|
||||
if os.getenv('MOZ_UPLOAD_DIR'):
|
||||
# i.e. testing/mozharness/build/raptor.json locally; in production it will
|
||||
# be at /tasks/task_*/build/ (where it will be picked up by mozharness later
|
||||
# and made into a tc artifact accessible in treeherder as perfherder-data.json)
|
||||
results_path = os.path.join(os.path.dirname(os.environ['MOZ_UPLOAD_DIR']),
|
||||
'raptor.json')
|
||||
screenshot_path = os.path.join(os.path.dirname(os.environ['MOZ_UPLOAD_DIR']),
|
||||
'screenshots.html')
|
||||
else:
|
||||
results_path = os.path.join(os.getcwd(), 'raptor.json')
|
||||
screenshot_path = os.path.join(os.getcwd(), 'screenshots.html')
|
||||
|
||||
if self.summarized_results == {}:
|
||||
LOG.error("no summarized raptor results found for %s" %
|
||||
', '.join(test_names))
|
||||
else:
|
||||
with open(results_path, 'w') as f:
|
||||
for result in self.summarized_results:
|
||||
f.write("%s\n" % result)
|
||||
|
||||
if len(self.summarized_screenshots) > 0:
|
||||
with open(screenshot_path, 'w') as f:
|
||||
for result in self.summarized_screenshots:
|
||||
f.write("%s\n" % result)
|
||||
LOG.info("screen captures can be found locally at: %s" % screenshot_path)
|
||||
|
||||
# now that we've checked for screen captures too, if there were no actual
|
||||
# test results we can bail out here
|
||||
if self.summarized_results == {}:
|
||||
return False, 0
|
||||
|
||||
# when gecko_profiling, we don't want results ingested by Perfherder
|
||||
extra_opts = self.summarized_results['suites'][0].get('extraOptions', [])
|
||||
test_type = self.summarized_results['suites'][0].get('type', '')
|
||||
|
||||
output_perf_data = True
|
||||
not_posting = '- not posting regular test results for perfherder'
|
||||
if 'gecko_profile' in extra_opts:
|
||||
LOG.info("gecko profiling enabled %s" % not_posting)
|
||||
output_perf_data = False
|
||||
elif test_type == 'scenario':
|
||||
# if a resource-usage flag was supplied the perfherder data
|
||||
# will still be output from output_supporting_data
|
||||
LOG.info("scenario test type was run %s" % not_posting)
|
||||
output_perf_data = False
|
||||
|
||||
total_perfdata = 0
|
||||
if output_perf_data:
|
||||
# if we have supporting data i.e. power, we ONLY want those measurements
|
||||
# dumped out. TODO: Bug 1515406 - Add option to output both supplementary
|
||||
# data (i.e. power) and the regular Raptor test result
|
||||
# Both are already available as separate PERFHERDER_DATA json blobs
|
||||
if len(self.summarized_supporting_data) == 0:
|
||||
LOG.info("PERFHERDER_DATA: %s" % json.dumps(self.summarized_results))
|
||||
total_perfdata = 1
|
||||
else:
|
||||
LOG.info("supporting data measurements exist - only posting those to perfherder")
|
||||
|
||||
json.dump(self.summarized_results, open(results_path, 'w'), indent=2,
|
||||
sort_keys=True)
|
||||
LOG.info("results can also be found locally at: %s" % results_path)
|
||||
|
||||
return True, total_perfdata
|
||||
|
||||
def output_supporting_data(self, test_names):
|
||||
'''
|
||||
Supporting data was gathered outside of the main raptor test; it has already
|
||||
been summarized, now output it appropriately.
|
||||
|
||||
We want to output supporting data in a completely separate perfherder json blob and
|
||||
in a corresponding file artifact. This way, supporting data can be ingested as its own
|
||||
test suite in perfherder and alerted upon if desired; kept outside of the test results
|
||||
from the actual Raptor test which was run when the supporting data was gathered.
|
||||
'''
|
||||
if len(self.summarized_supporting_data) == 0:
|
||||
LOG.error("no summarized supporting data found for %s" %
|
||||
', '.join(test_names))
|
||||
return False, 0
|
||||
|
||||
total_perfdata = 0
|
||||
for next_data_set in self.summarized_supporting_data:
|
||||
data_type = next_data_set['suites'][0]['type']
|
||||
|
||||
if os.environ['MOZ_UPLOAD_DIR']:
|
||||
# i.e. testing/mozharness/build/raptor.json locally; in production it will
|
||||
# be at /tasks/task_*/build/ (where it will be picked up by mozharness later
|
||||
# and made into a tc artifact accessible in treeherder as perfherder-data.json)
|
||||
results_path = os.path.join(os.path.dirname(os.environ['MOZ_UPLOAD_DIR']),
|
||||
'raptor-%s.json' % data_type)
|
||||
else:
|
||||
results_path = os.path.join(os.getcwd(), 'raptor-%s.json' % data_type)
|
||||
|
||||
# dump data to raptor-data.json artifact
|
||||
json.dump(next_data_set, open(results_path, 'w'), indent=2, sort_keys=True)
|
||||
|
||||
# the output that treeherder expects to find
|
||||
LOG.info("PERFHERDER_DATA: %s" % json.dumps(next_data_set))
|
||||
LOG.info("%s results can also be found locally at: %s" % (data_type, results_path))
|
||||
total_perfdata += 1
|
||||
|
||||
return True, total_perfdata
|
||||
|
||||
def construct_summary(self, vals, testname):
|
||||
|
||||
def _filter(vals, value=None):
|
||||
if value is None:
|
||||
return [i for i, j in vals]
|
||||
return [i for i, j in vals if j == value]
|
||||
|
||||
if testname.startswith('raptor-v8_7'):
|
||||
return 100 * filters.geometric_mean(_filter(vals))
|
||||
|
||||
if testname.startswith('raptor-speedometer'):
|
||||
correctionFactor = 3
|
||||
results = _filter(vals)
|
||||
# speedometer has 16 tests, each of these are made of up 9 subtests
|
||||
# and a sum of the 9 values. We receive 160 values, and want to use
|
||||
# the 16 test values, not the sub test values.
|
||||
if len(results) != 160:
|
||||
raise Exception("Speedometer has 160 subtests, found: %s instead" % len(results))
|
||||
|
||||
results = results[9::10]
|
||||
score = 60 * 1000 / filters.geometric_mean(results) / correctionFactor
|
||||
return score
|
||||
|
||||
if testname.startswith('raptor-stylebench'):
|
||||
# see https://bug-172968-attachments.webkit.org/attachment.cgi?id=319888
|
||||
correctionFactor = 3
|
||||
results = _filter(vals)
|
||||
|
||||
# stylebench has 5 tests, each of these are made of up 5 subtests
|
||||
#
|
||||
# * Adding classes.
|
||||
# * Removing classes.
|
||||
# * Mutating attributes.
|
||||
# * Adding leaf elements.
|
||||
# * Removing leaf elements.
|
||||
#
|
||||
# which are made of two subtests each (sync/async) and repeated 5 times
|
||||
# each, thus, the list here looks like:
|
||||
#
|
||||
# [Test name/Adding classes - 0/ Sync; <x>]
|
||||
# [Test name/Adding classes - 0/ Async; <y>]
|
||||
# [Test name/Adding classes - 0; <x> + <y>]
|
||||
# [Test name/Removing classes - 0/ Sync; <x>]
|
||||
# [Test name/Removing classes - 0/ Async; <y>]
|
||||
# [Test name/Removing classes - 0; <x> + <y>]
|
||||
# ...
|
||||
# [Test name/Adding classes - 1 / Sync; <x>]
|
||||
# [Test name/Adding classes - 1 / Async; <y>]
|
||||
# [Test name/Adding classes - 1 ; <x> + <y>]
|
||||
# ...
|
||||
# [Test name/Removing leaf elements - 4; <x> + <y>]
|
||||
# [Test name; <sum>] <- This is what we want.
|
||||
#
|
||||
# So, 5 (subtests) *
|
||||
# 5 (repetitions) *
|
||||
# 3 (entries per repetition (sync/async/sum)) =
|
||||
# 75 entries for test before the sum.
|
||||
#
|
||||
# We receive 76 entries per test, which ads up to 380. We want to use
|
||||
# the 5 test entries, not the rest.
|
||||
if len(results) != 380:
|
||||
raise Exception("StyleBench has 380 entries, found: %s instead" % len(results))
|
||||
results = results[75::76]
|
||||
return 60 * 1000 / filters.geometric_mean(results) / correctionFactor
|
||||
|
||||
if testname.startswith(('raptor-kraken', 'raptor-sunspider', 'supporting_data')):
|
||||
return sum(_filter(vals))
|
||||
|
||||
if testname.startswith(('raptor-unity-webgl', 'raptor-webaudio')):
|
||||
# webaudio_score and unity_webgl_score: self reported as 'Geometric Mean'
|
||||
return filters.mean(_filter(vals, 'Geometric Mean'))
|
||||
|
||||
if testname.startswith('raptor-assorted-dom'):
|
||||
return round(filters.geometric_mean(_filter(vals)), 2)
|
||||
|
||||
if testname.startswith('raptor-wasm-misc'):
|
||||
# wasm_misc_score: self reported as '__total__'
|
||||
return filters.mean(_filter(results, '__total__'))
|
||||
|
||||
if testname.startswith('raptor-wasm-godot'):
|
||||
# wasm_godot_score: first-interactive mean
|
||||
return filters.mean(_filter(vals, 'first-interactive'))
|
||||
|
||||
if testname.startswith('raptor-youtube-playback'):
|
||||
return round(filters.mean(_filter(vals)), 2)
|
||||
|
||||
if len(vals) > 1:
|
||||
return round(filters.geometric_mean(_filter(vals)), 2)
|
||||
|
||||
return round(filters.mean(_filter(vals)), 2)
|
||||
|
||||
|
||||
class RaptorOutput(PerftestOutput):
|
||||
"""class for raptor output"""
|
||||
|
||||
def summarize(self, test_names):
|
||||
suites = []
|
||||
test_results = {
|
||||
|
@ -460,10 +173,6 @@ class RaptorOutput(PerftestOutput):
|
|||
if len(subtests) > 1:
|
||||
suite['value'] = self.construct_summary(vals, testname=test.name)
|
||||
|
||||
subtests.sort(key=lambda subtest: subtest['name'])
|
||||
|
||||
suites.sort(key=lambda suite: suite['name'])
|
||||
|
||||
self.summarized_results = test_results
|
||||
|
||||
def combine_browser_cycles(self):
|
||||
|
@ -585,6 +294,95 @@ class RaptorOutput(PerftestOutput):
|
|||
self.summarized_results['suites'] = [item for item in self.summarized_results['suites']
|
||||
if item.get('to_be_deleted') is not True]
|
||||
|
||||
def summarize_supporting_data(self):
|
||||
'''
|
||||
Supporting data was gathered outside of the main raptor test; it will be kept
|
||||
separate from the main raptor test results. Summarize it appropriately.
|
||||
|
||||
supporting_data = {'type': 'data-type',
|
||||
'test': 'raptor-test-ran-when-data-was-gathered',
|
||||
'unit': 'unit that the values are in',
|
||||
'values': {
|
||||
'name': value,
|
||||
'nameN': valueN}}
|
||||
|
||||
More specifically, power data will look like this:
|
||||
|
||||
supporting_data = {'type': 'power',
|
||||
'test': 'raptor-speedometer-geckoview',
|
||||
'unit': 'mAh',
|
||||
'values': {
|
||||
'cpu': cpu,
|
||||
'wifi': wifi,
|
||||
'screen': screen,
|
||||
'proportional': proportional}}
|
||||
|
||||
We want to treat each value as a 'subtest'; and for the overall aggregated
|
||||
test result we will add all of these subtest values togther.
|
||||
'''
|
||||
if self.supporting_data is None:
|
||||
return
|
||||
|
||||
self.summarized_supporting_data = []
|
||||
support_data_by_type = {}
|
||||
|
||||
for data_set in self.supporting_data:
|
||||
|
||||
data_type = data_set['type']
|
||||
LOG.info("summarizing %s data" % data_type)
|
||||
|
||||
if data_type not in support_data_by_type:
|
||||
support_data_by_type[data_type] = {
|
||||
'framework': {
|
||||
'name': 'raptor',
|
||||
},
|
||||
'suites': [],
|
||||
}
|
||||
|
||||
# suite name will be name of the actual raptor test that ran, plus the type of
|
||||
# supporting data i.e. 'raptor-speedometer-geckoview-power'
|
||||
vals = []
|
||||
subtests = []
|
||||
suite = {
|
||||
'name': data_set['test'] + "-" + data_set['type'],
|
||||
'type': data_set['type'],
|
||||
'subtests': subtests,
|
||||
'lowerIsBetter': True,
|
||||
'unit': data_set['unit'],
|
||||
'alertThreshold': 2.0
|
||||
}
|
||||
|
||||
support_data_by_type[data_type]['suites'].append(suite)
|
||||
|
||||
# each supporting data measurement becomes a subtest, with the measurement type
|
||||
# used for the subtest name. i.e. 'power-cpu'
|
||||
# the overall 'suite' value for supporting data is dependent on
|
||||
# the unit of the values, by default the sum of all measurements
|
||||
# is taken.
|
||||
for measurement_name, value in data_set['values'].iteritems():
|
||||
new_subtest = {}
|
||||
new_subtest['name'] = data_type + "-" + measurement_name
|
||||
new_subtest['value'] = value
|
||||
new_subtest['lowerIsBetter'] = True
|
||||
new_subtest['alertThreshold'] = 2.0
|
||||
new_subtest['unit'] = data_set['unit']
|
||||
subtests.append(new_subtest)
|
||||
vals.append([new_subtest['value'], new_subtest['name']])
|
||||
|
||||
if len(subtests) >= 1:
|
||||
suite['value'] = self.construct_summary(
|
||||
vals,
|
||||
testname="supporting_data",
|
||||
unit=data_set['unit']
|
||||
)
|
||||
|
||||
# split the supporting data by type, there will be one
|
||||
# perfherder output per type
|
||||
for data_type in support_data_by_type:
|
||||
self.summarized_supporting_data.append(support_data_by_type[data_type])
|
||||
|
||||
return
|
||||
|
||||
def parseSpeedometerOutput(self, test):
|
||||
# each benchmark 'index' becomes a subtest; each pagecycle / iteration
|
||||
# of the test has multiple values per index/subtest
|
||||
|
@ -1089,8 +887,8 @@ class RaptorOutput(PerftestOutput):
|
|||
for pagecycle in data:
|
||||
for _sub, _value in pagecycle[0].iteritems():
|
||||
try:
|
||||
percent_dropped = (float(_value['droppedFrames']) /
|
||||
_value['decodedFrames'] * 100.0)
|
||||
percent_dropped = float(_value['droppedFrames']) / _value['decodedFrames'] \
|
||||
* 100.0
|
||||
except ZeroDivisionError:
|
||||
# if no frames have been decoded the playback failed completely
|
||||
percent_dropped = 100.0
|
||||
|
@ -1160,126 +958,281 @@ class RaptorOutput(PerftestOutput):
|
|||
|
||||
self.summarized_screenshots.append("""</table></body> </html>""")
|
||||
|
||||
def output(self, test_names):
|
||||
"""output to file and perfherder data json """
|
||||
if os.getenv('MOZ_UPLOAD_DIR'):
|
||||
# i.e. testing/mozharness/build/raptor.json locally; in production it will
|
||||
# be at /tasks/task_*/build/ (where it will be picked up by mozharness later
|
||||
# and made into a tc artifact accessible in treeherder as perfherder-data.json)
|
||||
results_path = os.path.join(os.path.dirname(os.environ['MOZ_UPLOAD_DIR']),
|
||||
'raptor.json')
|
||||
screenshot_path = os.path.join(os.path.dirname(os.environ['MOZ_UPLOAD_DIR']),
|
||||
'screenshots.html')
|
||||
else:
|
||||
results_path = os.path.join(os.getcwd(), 'raptor.json')
|
||||
screenshot_path = os.path.join(os.getcwd(), 'screenshots.html')
|
||||
|
||||
class BrowsertimeOutput(PerftestOutput):
|
||||
"""class for browsertime output"""
|
||||
|
||||
def summarize(self, test_names):
|
||||
"""
|
||||
Summarize the parsed browsertime test output, and format accordingly so the output can
|
||||
be ingested by Perfherder.
|
||||
|
||||
At this point each entry in self.results for browsertime-pageload tests is in this format:
|
||||
|
||||
{'statistics':{'fcp': {u'p99': 932, u'mdev': 10.0941, u'min': 712, u'p90': 810, u'max':
|
||||
932, u'median': 758, u'p10': 728, u'stddev': 50, u'mean': 769}, 'dcf': {u'p99': 864,
|
||||
u'mdev': 11.6768, u'min': 614, u'p90': 738, u'max': 864, u'median': 670, u'p10': 632,
|
||||
u'stddev': 58, u'mean': 684}, 'fnbpaint': {u'p99': 830, u'mdev': 9.6851, u'min': 616,
|
||||
u'p90': 719, u'max': 830, u'median': 668, u'p10': 642, u'stddev': 48, u'mean': 680},
|
||||
'loadtime': {u'p99': 5818, u'mdev': 111.7028, u'min': 3220, u'p90': 4450, u'max': 5818,
|
||||
u'median': 3476, u'p10': 3241, u'stddev': 559, u'mean': 3642}}, 'name':
|
||||
'raptor-tp6-guardian-firefox', 'url': 'https://www.theguardian.co.uk', 'lower_is_better':
|
||||
True, 'measurements': {'fcp': [932, 744, 744, 810, 712, 775, 759, 744, 777, 739, 809, 906,
|
||||
734, 742, 760, 758, 728, 792, 757, 759, 742, 759, 775, 726, 730], 'dcf': [864, 679, 637,
|
||||
662, 652, 651, 710, 679, 646, 689, 686, 845, 670, 694, 632, 703, 670, 738, 633, 703, 614,
|
||||
703, 650, 622, 670], 'fnbpaint': [830, 648, 666, 704, 616, 683, 678, 650, 685, 651, 719,
|
||||
820, 634, 664, 681, 664, 642, 703, 668, 670, 669, 668, 681, 652, 642], 'loadtime': [4450,
|
||||
3592, 3770, 3345, 3453, 3220, 3434, 3621, 3511, 3416, 3430, 5818, 4729, 3406, 3506, 3588,
|
||||
3245, 3381, 3707, 3241, 3595, 3483, 3236, 3390, 3476]}, 'subtest_unit': 'ms', 'bt_ver':
|
||||
'4.9.2-android', 'alert_threshold': 2, 'cold': True, 'type': 'browsertime-pageload',
|
||||
'unit': 'ms', 'browser': "{u'userAgent': u'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13;
|
||||
rv:70.0) Gecko/20100101 Firefox/70.0', u'windowSize': u'1366x694'}"}
|
||||
|
||||
Now we must process this further and prepare the result for output suitable for perfherder
|
||||
ingestion.
|
||||
|
||||
Note: For the overall subtest values/results (i.e. for each measurement type) we will use
|
||||
the Browsertime-provided statistics, instead of calcuating our own geomeans from the
|
||||
replicates.
|
||||
"""
|
||||
LOG.info("preparing browsertime results for output")
|
||||
|
||||
suites = []
|
||||
test_results = {
|
||||
'framework': {
|
||||
'name': 'browsertime',
|
||||
},
|
||||
'suites': suites,
|
||||
}
|
||||
|
||||
# check if we actually have any results
|
||||
if len(self.results) == 0:
|
||||
LOG.error("no browsertime test results found for %s" %
|
||||
if self.summarized_results == {}:
|
||||
LOG.error("no summarized raptor results found for %s" %
|
||||
', '.join(test_names))
|
||||
return
|
||||
else:
|
||||
with open(results_path, 'w') as f:
|
||||
for result in self.summarized_results:
|
||||
f.write("%s\n" % result)
|
||||
|
||||
for test in self.results:
|
||||
vals = []
|
||||
subtests = []
|
||||
suite = {
|
||||
'name': test['name'],
|
||||
'type': test['type'],
|
||||
'extraOptions': test['extra_options'],
|
||||
'subtests': subtests,
|
||||
'lowerIsBetter': test['lower_is_better'],
|
||||
'unit': test['unit'],
|
||||
'alertThreshold': float(test['alert_threshold'])
|
||||
}
|
||||
if len(self.summarized_screenshots) > 0:
|
||||
with open(screenshot_path, 'w') as f:
|
||||
for result in self.summarized_screenshots:
|
||||
f.write("%s\n" % result)
|
||||
LOG.info("screen captures can be found locally at: %s" % screenshot_path)
|
||||
|
||||
# Check if the test has set optional properties
|
||||
if hasattr(test, "alert_change_type"):
|
||||
suite['alertChangeType'] = test['alert_change_type']
|
||||
# now that we've checked for screen captures too, if there were no actual
|
||||
# test results we can bail out here
|
||||
if self.summarized_results == {}:
|
||||
return False, 0
|
||||
|
||||
# process results for pageloader type of tests
|
||||
if test["type"] != "browsertime-pageload":
|
||||
LOG.error("output.summarize received unsupported test results type for %s" %
|
||||
test['name'])
|
||||
continue
|
||||
# when gecko_profiling, we don't want results ingested by Perfherder
|
||||
extra_opts = self.summarized_results['suites'][0].get('extraOptions', [])
|
||||
test_type = self.summarized_results['suites'][0].get('type', '')
|
||||
|
||||
suites.append(suite)
|
||||
output_perf_data = True
|
||||
not_posting = '- not posting regular test results for perfherder'
|
||||
if 'gecko_profile' in extra_opts:
|
||||
LOG.info("gecko profiling enabled %s" % not_posting)
|
||||
output_perf_data = False
|
||||
elif test_type == 'scenario':
|
||||
# if a resource-usage flag was supplied the perfherder data
|
||||
# will still be output from output_supporting_data
|
||||
LOG.info("scenario test type was run %s" % not_posting)
|
||||
output_perf_data = False
|
||||
|
||||
for measurement_name, replicates in test['measurements'].iteritems():
|
||||
new_subtest = {}
|
||||
new_subtest['name'] = measurement_name
|
||||
new_subtest['replicates'] = replicates
|
||||
new_subtest['lowerIsBetter'] = test['subtest_lower_is_better']
|
||||
new_subtest['alertThreshold'] = float(test['alert_threshold'])
|
||||
new_subtest['value'] = 0
|
||||
new_subtest['unit'] = test['subtest_unit']
|
||||
total_perfdata = 0
|
||||
if output_perf_data:
|
||||
# if we have supporting data i.e. power, we ONLY want those measurements
|
||||
# dumped out. TODO: Bug 1515406 - Add option to output both supplementary
|
||||
# data (i.e. power) and the regular Raptor test result
|
||||
# Both are already available as separate PERFHERDER_DATA json blobs
|
||||
if len(self.summarized_supporting_data) == 0:
|
||||
LOG.info("PERFHERDER_DATA: %s" % json.dumps(self.summarized_results))
|
||||
total_perfdata = 1
|
||||
else:
|
||||
LOG.info("supporting data measurements exist - only posting those to perfherder")
|
||||
|
||||
# if 'alert_on' is set for this particular measurement, then we want to set the
|
||||
# flag in the perfherder output to turn on alerting for this subtest
|
||||
if self.subtest_alert_on is not None:
|
||||
if measurement_name in self.subtest_alert_on:
|
||||
LOG.info("turning on subtest alerting for measurement type: %s"
|
||||
% measurement_name)
|
||||
new_subtest['shouldAlert'] = True
|
||||
json.dump(self.summarized_results, open(results_path, 'w'), indent=2,
|
||||
sort_keys=True)
|
||||
LOG.info("results can also be found locally at: %s" % results_path)
|
||||
|
||||
# for the subtest (page-load measurement type) overall score/result/value, we
|
||||
# want to use the median of the replicates - now instead of calculating this
|
||||
# ourselves, we will take this value from the browsertime results themselves
|
||||
# as browsertime calculates the mean (and other values) automatically for us
|
||||
bt_measurement_median = test['statistics'][measurement_name]['median']
|
||||
new_subtest['value'] = bt_measurement_median
|
||||
return True, total_perfdata
|
||||
|
||||
# we have a vals list that contains all the top level results for each of the
|
||||
# measurement types; this will be used to calculate an overall test result
|
||||
# which will be the geomean of all of the top level results of each type
|
||||
vals.append([new_subtest['value'], new_subtest['name']])
|
||||
subtests.append(new_subtest)
|
||||
def output_supporting_data(self, test_names):
|
||||
'''
|
||||
Supporting data was gathered outside of the main raptor test; it has already
|
||||
been summarized, now output it appropriately.
|
||||
|
||||
# for pageload tests, if there are > 1 subtests here, that means there
|
||||
# were multiple measurement types captured in each single pageload; we want
|
||||
# to get the mean of those values and report 1 overall 'suite' value
|
||||
# for the page; all replicates will still be available in the JSON artifact
|
||||
We want to output supporting data in a completely separate perfherder json blob and
|
||||
in a corresponding file artifact. This way supporting data can be ingested as it's own
|
||||
test suite in perfherder and alerted upon if desired. Kept outside of the test results
|
||||
from the actual Raptor test that was ran when the supporting data was gathered.
|
||||
'''
|
||||
if len(self.summarized_supporting_data) == 0:
|
||||
LOG.error("no summarized supporting data found for %s" %
|
||||
', '.join(test_names))
|
||||
return False, 0
|
||||
|
||||
# summarize results to get top overall suite result
|
||||
if len(subtests) > 1:
|
||||
suite['value'] = self.construct_summary(vals,
|
||||
testname=test['name'])
|
||||
total_perfdata = 0
|
||||
for next_data_set in self.summarized_supporting_data:
|
||||
data_type = next_data_set['suites'][0]['type']
|
||||
|
||||
subtests.sort(key=lambda subtest: subtest['name'])
|
||||
if os.environ['MOZ_UPLOAD_DIR']:
|
||||
# i.e. testing/mozharness/build/raptor.json locally; in production it will
|
||||
# be at /tasks/task_*/build/ (where it will be picked up by mozharness later
|
||||
# and made into a tc artifact accessible in treeherder as perfherder-data.json)
|
||||
results_path = os.path.join(os.path.dirname(os.environ['MOZ_UPLOAD_DIR']),
|
||||
'raptor-%s.json' % data_type)
|
||||
else:
|
||||
results_path = os.path.join(os.getcwd(), 'raptor-%s.json' % data_type)
|
||||
|
||||
suites.sort(key=lambda suite: suite['name'])
|
||||
# dump data to raptor-data.json artifact
|
||||
json.dump(next_data_set, open(results_path, 'w'), indent=2, sort_keys=True)
|
||||
|
||||
self.summarized_results = test_results
|
||||
# the output that treeherder expects to find
|
||||
LOG.info("PERFHERDER_DATA: %s" % json.dumps(next_data_set))
|
||||
LOG.info("%s results can also be found locally at: %s" % (data_type, results_path))
|
||||
total_perfdata += 1
|
||||
|
||||
return True, total_perfdata
|
||||
|
||||
@classmethod
|
||||
def v8_Metric(cls, val_list):
|
||||
results = [i for i, j in val_list]
|
||||
score = 100 * filters.geometric_mean(results)
|
||||
return score
|
||||
|
||||
@classmethod
|
||||
def JS_Metric(cls, val_list):
|
||||
"""v8 benchmark score"""
|
||||
results = [i for i, j in val_list]
|
||||
return sum(results)
|
||||
|
||||
@classmethod
|
||||
def speedometer_score(cls, val_list):
|
||||
"""
|
||||
speedometer_score: https://bug-172968-attachments.webkit.org/attachment.cgi?id=319888
|
||||
"""
|
||||
correctionFactor = 3
|
||||
results = [i for i, j in val_list]
|
||||
# speedometer has 16 tests, each of these are made of up 9 subtests
|
||||
# and a sum of the 9 values. We receive 160 values, and want to use
|
||||
# the 16 test values, not the sub test values.
|
||||
if len(results) != 160:
|
||||
raise Exception("Speedometer has 160 subtests, found: %s instead" % len(results))
|
||||
|
||||
results = results[9::10]
|
||||
score = 60 * 1000 / filters.geometric_mean(results) / correctionFactor
|
||||
return score
|
||||
|
||||
@classmethod
|
||||
def benchmark_score(cls, val_list):
|
||||
"""
|
||||
benchmark_score: ares6/jetstream self reported as 'geomean'
|
||||
"""
|
||||
results = [i for i, j in val_list if j == 'geomean']
|
||||
return filters.mean(results)
|
||||
|
||||
@classmethod
|
||||
def webaudio_score(cls, val_list):
|
||||
"""
|
||||
webaudio_score: self reported as 'Geometric Mean'
|
||||
"""
|
||||
results = [i for i, j in val_list if j == 'Geometric Mean']
|
||||
return filters.mean(results)
|
||||
|
||||
@classmethod
|
||||
def unity_webgl_score(cls, val_list):
|
||||
"""
|
||||
unity_webgl_score: self reported as 'Geometric Mean'
|
||||
"""
|
||||
results = [i for i, j in val_list if j == 'Geometric Mean']
|
||||
return filters.mean(results)
|
||||
|
||||
@classmethod
|
||||
def wasm_misc_score(cls, val_list):
|
||||
"""
|
||||
wasm_misc_score: self reported as '__total__'
|
||||
"""
|
||||
results = [i for i, j in val_list if j == '__total__']
|
||||
return filters.mean(results)
|
||||
|
||||
@classmethod
|
||||
def wasm_godot_score(cls, val_list):
|
||||
"""
|
||||
wasm_godot_score: first-interactive mean
|
||||
"""
|
||||
results = [i for i, j in val_list if j == 'first-interactive']
|
||||
return filters.mean(results)
|
||||
|
||||
@classmethod
|
||||
def stylebench_score(cls, val_list):
|
||||
"""
|
||||
stylebench_score: https://bug-172968-attachments.webkit.org/attachment.cgi?id=319888
|
||||
"""
|
||||
correctionFactor = 3
|
||||
results = [i for i, j in val_list]
|
||||
|
||||
# stylebench has 5 tests, each of these are made of up 5 subtests
|
||||
#
|
||||
# * Adding classes.
|
||||
# * Removing classes.
|
||||
# * Mutating attributes.
|
||||
# * Adding leaf elements.
|
||||
# * Removing leaf elements.
|
||||
#
|
||||
# which are made of two subtests each (sync/async) and repeated 5 times
|
||||
# each, thus, the list here looks like:
|
||||
#
|
||||
# [Test name/Adding classes - 0/ Sync; <x>]
|
||||
# [Test name/Adding classes - 0/ Async; <y>]
|
||||
# [Test name/Adding classes - 0; <x> + <y>]
|
||||
# [Test name/Removing classes - 0/ Sync; <x>]
|
||||
# [Test name/Removing classes - 0/ Async; <y>]
|
||||
# [Test name/Removing classes - 0; <x> + <y>]
|
||||
# ...
|
||||
# [Test name/Adding classes - 1 / Sync; <x>]
|
||||
# [Test name/Adding classes - 1 / Async; <y>]
|
||||
# [Test name/Adding classes - 1 ; <x> + <y>]
|
||||
# ...
|
||||
# [Test name/Removing leaf elements - 4; <x> + <y>]
|
||||
# [Test name; <sum>] <- This is what we want.
|
||||
#
|
||||
# So, 5 (subtests) *
|
||||
# 5 (repetitions) *
|
||||
# 3 (entries per repetition (sync/async/sum)) =
|
||||
# 75 entries for test before the sum.
|
||||
#
|
||||
# We receive 76 entries per test, which ads up to 380. We want to use
|
||||
# the 5 test entries, not the rest.
|
||||
if len(results) != 380:
|
||||
raise Exception("StyleBench has 380 entries, found: %s instead" % len(results))
|
||||
|
||||
results = results[75::76]
|
||||
score = 60 * 1000 / filters.geometric_mean(results) / correctionFactor
|
||||
return score
|
||||
|
||||
@classmethod
|
||||
def sunspider_score(cls, val_list):
|
||||
results = [i for i, j in val_list]
|
||||
return sum(results)
|
||||
|
||||
@classmethod
|
||||
def assorted_dom_score(cls, val_list):
|
||||
results = [i for i, j in val_list]
|
||||
return round(filters.geometric_mean(results), 2)
|
||||
|
||||
@classmethod
|
||||
def youtube_playback_performance_score(cls, val_list):
|
||||
"""Calculate percentage of failed tests."""
|
||||
results = [i for i, j in val_list]
|
||||
return round(filters.mean(results), 2)
|
||||
|
||||
@classmethod
|
||||
def supporting_data_total(cls, val_list):
|
||||
results = [i for i, j in val_list]
|
||||
return sum(results)
|
||||
|
||||
@classmethod
|
||||
def supporting_data_average(cls, val_list):
|
||||
results = [i for i, j in val_list]
|
||||
return sum(results)/len(results)
|
||||
|
||||
def construct_summary(self, vals, testname, unit=None):
|
||||
if testname.startswith('raptor-v8_7'):
|
||||
return self.v8_Metric(vals)
|
||||
elif testname.startswith('raptor-kraken'):
|
||||
return self.JS_Metric(vals)
|
||||
elif testname.startswith('raptor-speedometer'):
|
||||
return self.speedometer_score(vals)
|
||||
elif testname.startswith('raptor-stylebench'):
|
||||
return self.stylebench_score(vals)
|
||||
elif testname.startswith('raptor-sunspider'):
|
||||
return self.sunspider_score(vals)
|
||||
elif testname.startswith('raptor-unity-webgl'):
|
||||
return self.unity_webgl_score(vals)
|
||||
elif testname.startswith('raptor-webaudio'):
|
||||
return self.webaudio_score(vals)
|
||||
elif testname.startswith('raptor-assorted-dom'):
|
||||
return self.assorted_dom_score(vals)
|
||||
elif testname.startswith('raptor-wasm-misc'):
|
||||
return self.wasm_misc_score(vals)
|
||||
elif testname.startswith('raptor-wasm-godot'):
|
||||
return self.wasm_godot_score(vals)
|
||||
elif testname.startswith('raptor-youtube-playback'):
|
||||
return self.youtube_playback_performance_score(vals)
|
||||
elif testname.startswith('supporting_data'):
|
||||
if unit and unit in ('%',):
|
||||
return self.supporting_data_average(vals)
|
||||
else:
|
||||
return self.supporting_data_total(vals)
|
||||
elif len(vals) > 1:
|
||||
return round(filters.geometric_mean([i for i, j in vals]), 2)
|
||||
else:
|
||||
return round(filters.mean([i for i, j in vals]), 2)
|
||||
|
|
|
@ -60,7 +60,7 @@ from manifest import get_raptor_test_list
|
|||
from memory import generate_android_memory_profile
|
||||
from performance_tuning import tune_performance
|
||||
from power import init_android_power_test, finish_android_power_test
|
||||
from results import RaptorResultsHandler, BrowsertimeResultsHandler
|
||||
from results import RaptorResultsHandler
|
||||
from utils import view_gecko_profile, write_yml_file
|
||||
from cpu import start_android_cpu_profiler
|
||||
|
||||
|
@ -92,9 +92,7 @@ either Raptor or browsertime."""
|
|||
gecko_profile=False, gecko_profile_interval=None, gecko_profile_entries=None,
|
||||
symbols_path=None, host=None, power_test=False, cpu_test=False, memory_test=False,
|
||||
is_release_build=False, debug_mode=False, post_startup_delay=None,
|
||||
interrupt_handler=None, e10s=True, enable_webrender=False,
|
||||
results_handler_class=RaptorResultsHandler,
|
||||
**kwargs):
|
||||
interrupt_handler=None, e10s=True, enable_webrender=False, **kwargs):
|
||||
|
||||
# Override the magic --host HOST_IP with the value of the environment variable.
|
||||
if host == 'HOST_IP':
|
||||
|
@ -134,7 +132,6 @@ either Raptor or browsertime."""
|
|||
self.profile_class = profile_class or app
|
||||
self.firefox_android_apps = FIREFOX_ANDROID_APPS
|
||||
self.interrupt_handler = interrupt_handler
|
||||
self.results_handler = results_handler_class(self.config)
|
||||
|
||||
# debug mode is currently only supported when running locally
|
||||
self.debug_mode = debug_mode if self.config['run_local'] else False
|
||||
|
@ -147,6 +144,9 @@ either Raptor or browsertime."""
|
|||
|
||||
LOG.info("main raptor init, config is: %s" % str(self.config))
|
||||
|
||||
# setup the control server
|
||||
self.results_handler = RaptorResultsHandler(self.config)
|
||||
|
||||
self.build_browser_profile()
|
||||
|
||||
def build_browser_profile(self):
|
||||
|
@ -189,12 +189,6 @@ either Raptor or browsertime."""
|
|||
def run_test_setup(self, test):
|
||||
LOG.info("starting test: %s" % test['name'])
|
||||
|
||||
# if 'alert_on' was provided in the test INI, add to our config for results/output
|
||||
self.config['subtest_alert_on'] = test.get('alert_on')
|
||||
|
||||
if test.get("preferences") is not None:
|
||||
self.set_browser_test_prefs(test['preferences'])
|
||||
|
||||
def run_tests(self, tests, test_names):
|
||||
try:
|
||||
for test in tests:
|
||||
|
@ -205,7 +199,7 @@ either Raptor or browsertime."""
|
|||
LOG.error(e)
|
||||
finally:
|
||||
self.run_test_teardown(test)
|
||||
return self.process_results(tests, test_names)
|
||||
return self.process_results(test_names)
|
||||
finally:
|
||||
self.clean_up()
|
||||
|
||||
|
@ -224,7 +218,7 @@ either Raptor or browsertime."""
|
|||
LOG.info("cleaning up after gecko profiling")
|
||||
self.gecko_profiler.clean()
|
||||
|
||||
def process_results(self, tests, test_names):
|
||||
def process_results(self, test_names):
|
||||
# when running locally output results in build/raptor.json; when running
|
||||
# in production output to a local.json to be turned into tc job artifact
|
||||
raptor_json_path = os.path.join(self.artifact_dir, 'raptor.json')
|
||||
|
@ -232,11 +226,7 @@ either Raptor or browsertime."""
|
|||
raptor_json_path = os.path.join(os.getcwd(), 'local.json')
|
||||
|
||||
self.config['raptor_json_path'] = raptor_json_path
|
||||
return self.results_handler.summarize_and_output(self.config, tests, test_names)
|
||||
|
||||
@abstractmethod
|
||||
def set_browser_test_prefs(self):
|
||||
pass
|
||||
return self.results_handler.summarize_and_output(self.config, test_names)
|
||||
|
||||
@abstractmethod
|
||||
def check_for_crashes(self):
|
||||
|
@ -336,12 +326,8 @@ class Browsertime(Perftest):
|
|||
value = kwargs.pop(key)
|
||||
setattr(self, key, value)
|
||||
|
||||
def klass(config):
|
||||
root_results_dir = os.path.join(os.environ.get('MOZ_UPLOAD_DIR', os.getcwd()),
|
||||
'browsertime-results')
|
||||
return BrowsertimeResultsHandler(config, root_results_dir=root_results_dir)
|
||||
super(Browsertime, self).__init__(*args, **kwargs)
|
||||
|
||||
super(Browsertime, self).__init__(*args, results_handler_class=klass, **kwargs)
|
||||
LOG.info("cwd: '{}'".format(os.getcwd()))
|
||||
|
||||
# For debugging.
|
||||
|
@ -356,11 +342,6 @@ class Browsertime(Perftest):
|
|||
except Exception as e:
|
||||
LOG.info("{}: {}".format(k, e))
|
||||
|
||||
def set_browser_test_prefs(self, raw_prefs):
|
||||
# add test specific preferences
|
||||
LOG.info("setting test-specific Firefox preferences")
|
||||
self.profile.set_preferences(json.loads(raw_prefs))
|
||||
|
||||
def run_test_setup(self, test):
|
||||
super(Browsertime, self).run_test_setup(test)
|
||||
|
||||
|
@ -374,6 +355,12 @@ class Browsertime(Perftest):
|
|||
if self.browsertime_chromedriver:
|
||||
self.driver_paths.extend(['--chrome.chromedriverPath', self.browsertime_chromedriver])
|
||||
|
||||
self.resultdir = [
|
||||
'--resultDir',
|
||||
os.path.join(os.environ.get('MOZ_UPLOAD_DIR', os.getcwd()),
|
||||
'browsertime-results', test['name']),
|
||||
]
|
||||
|
||||
LOG.info('test: {}'.format(test))
|
||||
|
||||
def run_test_teardown(self, test):
|
||||
|
@ -389,26 +376,20 @@ class Browsertime(Perftest):
|
|||
def clean_up(self):
|
||||
super(Browsertime, self).clean_up()
|
||||
|
||||
@property
|
||||
def browsertime_args(self):
|
||||
binary_path = self.config['binary']
|
||||
LOG.info('binary_path: {}'.format(binary_path))
|
||||
|
||||
return ['--browser', 'firefox', '--firefox.binaryPath', binary_path]
|
||||
|
||||
def run_test(self, test, timeout):
|
||||
|
||||
self.run_test_setup(test)
|
||||
|
||||
cmd = ([self.browsertime_node, self.browsertime_browsertimejs] +
|
||||
self.driver_paths +
|
||||
self.browsertime_args +
|
||||
['--skipHar',
|
||||
'--video', 'true',
|
||||
'--visualMetrics', 'false',
|
||||
'-vv',
|
||||
'--resultDir', self.results_handler.result_dir_for_test(test),
|
||||
'-n', str(test.get('browser_cycles', 1)), test['test_url']])
|
||||
cmd = [self.browsertime_node, self.browsertime_browsertimejs, '--browser', 'firefox'] + \
|
||||
self.driver_paths + \
|
||||
['--firefox.binaryPath', self.config['binary'],
|
||||
'--skipHar',
|
||||
'--video', 'true',
|
||||
'--visualMetrics', 'false',
|
||||
'-vv'] + \
|
||||
self.resultdir + \
|
||||
['-n', str(test.get('browser_cycles', 1)),
|
||||
test['test_url']]
|
||||
|
||||
# timeout is a single page-load timeout value in ms from the test INI
|
||||
# convert timeout to seconds and account for browser cycles
|
||||
|
@ -425,7 +406,6 @@ class Browsertime(Perftest):
|
|||
LOG.info('timeout (s): {}'.format(timeout))
|
||||
LOG.info('browsertime cwd: {}'.format(os.getcwd()))
|
||||
LOG.info('browsertime cmd: {}'.format(cmd))
|
||||
LOG.info('browsertime_ffmpeg: {}'.format(self.browsertime_ffmpeg))
|
||||
|
||||
# browsertime requires ffmpeg on the PATH for `--video=true`.
|
||||
# It's easier to configure the PATH here than at the TC level.
|
||||
|
@ -439,8 +419,6 @@ class Browsertime(Perftest):
|
|||
new_path = new_path.encode('utf-8', 'strict')
|
||||
env['PATH'] = new_path
|
||||
|
||||
LOG.info('PATH: {}'.format(env['PATH']))
|
||||
|
||||
try:
|
||||
proc = mozprocess.ProcessHandler(cmd, env=env)
|
||||
proc.run(timeout=timeout,
|
||||
|
@ -448,38 +426,11 @@ class Browsertime(Perftest):
|
|||
proc.wait()
|
||||
|
||||
except Exception as e:
|
||||
LOG.critical("Error while attempting to run browsertime: %s" % str(e))
|
||||
raise
|
||||
raise Exception("Error while attempting to run browsertime: %s" % str(e))
|
||||
|
||||
|
||||
class BrowsertimeAndroid(Browsertime):
|
||||
|
||||
def __init__(self, app, binary, activity=None, intent=None, **kwargs):
|
||||
super(BrowsertimeAndroid, self).__init__(app, binary, profile_class="firefox", **kwargs)
|
||||
|
||||
self.config.update({
|
||||
'activity': activity,
|
||||
'intent': intent,
|
||||
})
|
||||
|
||||
@property
|
||||
def browsertime_args(self):
|
||||
return ['--browser', 'firefox', '--android',
|
||||
# Work around a `selenium-webdriver` issue where Browsertime
|
||||
# fails to find a Firefox binary even though we're going to
|
||||
# actually do things on an Android device.
|
||||
'--firefox.binaryPath', self.browsertime_node,
|
||||
'--firefox.android.package', self.config['binary'],
|
||||
'--firefox.android.activity', self.config['activity']]
|
||||
|
||||
def build_browser_profile(self):
|
||||
super(BrowsertimeAndroid, self).build_browser_profile()
|
||||
|
||||
# Merge in the Android profile.
|
||||
path = os.path.join(self.profile_data_dir, 'raptor-android')
|
||||
LOG.info("Merging profile: {}".format(path))
|
||||
self.profile.merge(path)
|
||||
self.profile.set_preferences({'browser.tabs.remote.autostart': self.config['e10s']})
|
||||
def process_results(self, test_names):
|
||||
# TODO - Bug 1565316 - Process browsertime results and dump out for perfherder
|
||||
LOG.info("TODO: Bug 1565316 - Process browsertime results and dump out for perfherder")
|
||||
|
||||
|
||||
class Raptor(Perftest):
|
||||
|
@ -492,14 +443,6 @@ class Raptor(Perftest):
|
|||
|
||||
super(Raptor, self).__init__(*args, **kwargs)
|
||||
|
||||
# set up the results handler
|
||||
self.results_handler = RaptorResultsHandler(
|
||||
gecko_profile=self.config.get('gecko_profile'),
|
||||
power_test=self.config.get('power_test'),
|
||||
cpu_test=self.config.get('cpu_test'),
|
||||
memory_test=self.config.get('memory_test'),
|
||||
)
|
||||
|
||||
self.start_control_server()
|
||||
|
||||
def run_test_setup(self, test):
|
||||
|
@ -525,6 +468,12 @@ class Raptor(Perftest):
|
|||
|
||||
self.install_raptor_webext()
|
||||
|
||||
if test.get("preferences") is not None:
|
||||
self.set_browser_test_prefs(test['preferences'])
|
||||
|
||||
# if 'alert_on' was provided in the test INI, add to our config for results/output
|
||||
self.config['subtest_alert_on'] = test.get('alert_on')
|
||||
|
||||
def wait_for_test_finish(self, test, timeout):
|
||||
# this is a 'back-stop' i.e. if for some reason Raptor doesn't finish for some
|
||||
# serious problem; i.e. the test was unable to send a 'page-timeout' to the control
|
||||
|
@ -728,9 +677,9 @@ class RaptorDesktop(Raptor):
|
|||
os.mkdir(output_dir)
|
||||
if not os.path.exists(test_dir):
|
||||
os.mkdir(test_dir)
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
LOG.critical("Could not create directories to store power testing data.")
|
||||
raise
|
||||
raise e
|
||||
|
||||
# Start power measurements with IPG creating a power usage log
|
||||
# every 30 seconds with 1 data point per second (or a 1000 milli-
|
||||
|
@ -1390,14 +1339,7 @@ def main(args=sys.argv[1:]):
|
|||
value = outer_kwargs.pop(key)
|
||||
inner_kwargs[key] = value
|
||||
|
||||
if args.app == "firefox":
|
||||
klass = Browsertime
|
||||
elif args.app in CHROMIUM_DISTROS:
|
||||
klass = Browsertime
|
||||
else:
|
||||
klass = BrowsertimeAndroid
|
||||
|
||||
return klass(*inner_args, **inner_kwargs)
|
||||
return Browsertime(*inner_args, **inner_kwargs)
|
||||
|
||||
raptor = raptor_class(args.app,
|
||||
args.binary,
|
||||
|
|
|
@ -9,32 +9,27 @@ from __future__ import absolute_import
|
|||
import json
|
||||
import os
|
||||
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from logger.logger import RaptorLogger
|
||||
from output import RaptorOutput, BrowsertimeOutput
|
||||
from output import Output
|
||||
|
||||
LOG = RaptorLogger(component='perftest-results-handler')
|
||||
LOG = RaptorLogger(component='raptor-results-handler')
|
||||
|
||||
|
||||
class PerftestResultsHandler(object):
|
||||
"""Abstract base class to handle perftest results"""
|
||||
class RaptorResultsHandler():
|
||||
"""Handle Raptor test results"""
|
||||
|
||||
__metaclass__ = ABCMeta
|
||||
|
||||
def __init__(self, gecko_profile=False, power_test=False,
|
||||
cpu_test=False, memory_test=False, **kwargs):
|
||||
self.gecko_profile = gecko_profile
|
||||
self.power_test = power_test
|
||||
self.cpu_test = cpu_test
|
||||
self.memory_test = memory_test
|
||||
def __init__(self, config=None):
|
||||
self.config = config
|
||||
self.results = []
|
||||
self.page_timeout_list = []
|
||||
self.images = []
|
||||
self.supporting_data = None
|
||||
|
||||
@abstractmethod
|
||||
def add(self, new_result_json):
|
||||
raise NotImplementedError()
|
||||
# add to results
|
||||
LOG.info("received results in RaptorResultsHandler.add")
|
||||
new_result = RaptorTestResult(new_result_json)
|
||||
self.results.append(new_result)
|
||||
|
||||
def add_image(self, screenshot, test_name, page_cycle):
|
||||
# add to results
|
||||
|
@ -82,7 +77,9 @@ class PerftestResultsHandler(object):
|
|||
|
||||
def _get_expected_perfherder(self, output):
|
||||
def is_resource_test():
|
||||
if self.power_test or self.cpu_test or self.memory_test:
|
||||
if self.config.get('power_test', None) or \
|
||||
self.config.get('cpu_test', None) or \
|
||||
self.config.get('memory_test', None):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
@ -109,11 +106,11 @@ class PerftestResultsHandler(object):
|
|||
# for the regular raptor tests (i.e. speedometer) so we
|
||||
# expect one per resource-type, starting with 0
|
||||
expected_perfherder = 0
|
||||
if self.power_test:
|
||||
if self.config.get('power_test', None):
|
||||
expected_perfherder += 1
|
||||
if self.memory_test:
|
||||
if self.config.get('memory_test', None):
|
||||
expected_perfherder += 1
|
||||
if self.cpu_test:
|
||||
if self.config.get('cpu_test', None):
|
||||
expected_perfherder += 1
|
||||
|
||||
return expected_perfherder
|
||||
|
@ -152,24 +149,10 @@ class PerftestResultsHandler(object):
|
|||
return False
|
||||
return True
|
||||
|
||||
@abstractmethod
|
||||
def summarize_and_output(self, test_config, tests, test_names):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class RaptorResultsHandler(PerftestResultsHandler):
|
||||
"""Process Raptor results"""
|
||||
|
||||
def add(self, new_result_json):
|
||||
# add to results
|
||||
LOG.info("received results in RaptorResultsHandler.add")
|
||||
new_result = RaptorTestResult(new_result_json)
|
||||
self.results.append(new_result)
|
||||
|
||||
def summarize_and_output(self, test_config, tests, test_names):
|
||||
def summarize_and_output(self, test_config, test_names):
|
||||
# summarize the result data, write to file and output PERFHERDER_DATA
|
||||
LOG.info("summarizing raptor test results")
|
||||
output = RaptorOutput(self.results, self.supporting_data, test_config['subtest_alert_on'])
|
||||
output = Output(self.results, self.supporting_data, test_config['subtest_alert_on'])
|
||||
output.summarize(test_names)
|
||||
# that has each browser cycle separate; need to check if there were multiple browser
|
||||
# cycles, and if so need to combine results from all cycles into one overall result
|
||||
|
@ -181,7 +164,7 @@ class RaptorResultsHandler(PerftestResultsHandler):
|
|||
output.summarize_supporting_data()
|
||||
res, out_sup_perfdata = output.output_supporting_data(test_names)
|
||||
res, out_perfdata = output.output(test_names)
|
||||
if not self.gecko_profile:
|
||||
if not self.config['gecko_profile']:
|
||||
# res will remain True if no problems are encountered
|
||||
# during schema validation and perferder_data counting
|
||||
res = self._validate_treeherder_data(output, out_sup_perfdata + out_perfdata)
|
||||
|
@ -197,221 +180,3 @@ class RaptorTestResult():
|
|||
# convert test result json/dict (from control server) to test result object instance
|
||||
for key, value in test_result_json.iteritems():
|
||||
setattr(self, key, value)
|
||||
|
||||
|
||||
class BrowsertimeResultsHandler(PerftestResultsHandler):
|
||||
"""Process Browsertime results"""
|
||||
def __init__(self, config, root_results_dir=None):
|
||||
super(BrowsertimeResultsHandler, self).__init__(config)
|
||||
self._root_results_dir = root_results_dir
|
||||
|
||||
def result_dir_for_test(self, test):
|
||||
return os.path.join(self._root_results_dir, test['name'])
|
||||
|
||||
def add(self, new_result_json):
|
||||
# not using control server with bt
|
||||
pass
|
||||
|
||||
def parse_browsertime_json(self, raw_btresults):
|
||||
"""
|
||||
Receive a json blob that contains the results direct from the browsertime tool. Parse
|
||||
out the values that we wish to use and add those to our result object. That object will
|
||||
then be further processed in the BrowsertimeOutput class.
|
||||
|
||||
The values that we care about in the browsertime.json are structured as follows.
|
||||
The 'browserScripts' section has one entry for each page-load / browsertime cycle!
|
||||
|
||||
[
|
||||
{
|
||||
"info": {
|
||||
"browsertime": {
|
||||
"version": "4.9.2-android"
|
||||
},
|
||||
"url": "https://www.theguardian.co.uk",
|
||||
},
|
||||
"browserScripts": [
|
||||
{
|
||||
"browser": {
|
||||
"userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:70.0)
|
||||
Gecko/20100101 Firefox/70.0",
|
||||
"windowSize": "1366x694"
|
||||
},
|
||||
"timings": {
|
||||
"firstPaint": 830,
|
||||
"loadEventEnd": 4450,
|
||||
"timeToContentfulPaint": 932,
|
||||
"timeToDomContentFlushed": 864,
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
<repeated for every page-load cycle>
|
||||
},
|
||||
],
|
||||
"statistics": {
|
||||
"timings": {
|
||||
"firstPaint": {
|
||||
"median": 668,
|
||||
"mean": 680,
|
||||
"mdev": 9.6851,
|
||||
"stddev": 48,
|
||||
"min": 616,
|
||||
"p10": 642,
|
||||
"p90": 719,
|
||||
"p99": 830,
|
||||
"max": 830
|
||||
},
|
||||
"loadEventEnd": {
|
||||
"median": 3476,
|
||||
"mean": 3642,
|
||||
"mdev": 111.7028,
|
||||
"stddev": 559,
|
||||
"min": 3220,
|
||||
"p10": 3241,
|
||||
"p90": 4450,
|
||||
"p99": 5818,
|
||||
"max": 5818
|
||||
},
|
||||
"timeToContentfulPaint": {
|
||||
"median": 758,
|
||||
"mean": 769,
|
||||
"mdev": 10.0941,
|
||||
"stddev": 50,
|
||||
"min": 712,
|
||||
"p10": 728,
|
||||
"p90": 810,
|
||||
"p99": 932,
|
||||
"max": 932
|
||||
},
|
||||
"timeToDomContentFlushed": {
|
||||
"median": 670,
|
||||
"mean": 684,
|
||||
"mdev": 11.6768,
|
||||
"stddev": 58,
|
||||
"min": 614,
|
||||
"p10": 632,
|
||||
"p90": 738,
|
||||
"p99": 864,
|
||||
"max": 864
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
"""
|
||||
LOG.info("parsing results from browsertime json")
|
||||
|
||||
# For now, assume that browsertime loads only one site.
|
||||
if len(raw_btresults) != 1:
|
||||
raise ValueError("Browsertime did not measure exactly one site.")
|
||||
(_raw_bt_results,) = raw_btresults
|
||||
|
||||
if not _raw_bt_results['browserScripts']:
|
||||
raise ValueError("Browsertime produced no measurements.")
|
||||
bt_browser = _raw_bt_results['browserScripts'][0]['browser']
|
||||
|
||||
bt_ver = _raw_bt_results['info']['browsertime']['version']
|
||||
bt_url = _raw_bt_results['info']['url'],
|
||||
bt_result = {'bt_ver': bt_ver,
|
||||
'browser': bt_browser,
|
||||
'url': bt_url,
|
||||
'measurements': {},
|
||||
'statistics': {}}
|
||||
|
||||
# bt to raptor names
|
||||
conversion = (('fnbpaint', 'firstPaint'),
|
||||
('fcp', 'timeToContentfulPaint'),
|
||||
('dcf', 'timeToDomContentFlushed'),
|
||||
('loadtime', 'loadEventEnd'))
|
||||
|
||||
# extracting values from browserScripts and statistics
|
||||
for bt, raptor in conversion:
|
||||
# XXX looping several times in the list, could do better
|
||||
bt_result['measurements'][bt] = [cycle['timings'][raptor] for cycle in
|
||||
_raw_bt_results['browserScripts']]
|
||||
|
||||
# let's add the browsertime statistics; we'll use those for overall values instead
|
||||
# of calculating our own based on the replicates
|
||||
bt_result['statistics'][bt] = _raw_bt_results['statistics']['timings'][raptor]
|
||||
|
||||
return bt_result
|
||||
|
||||
def summarize_and_output(self, test_config, tests, test_names):
|
||||
"""
|
||||
Retrieve, process, and output the browsertime test results. Currently supports page-load
|
||||
type tests only.
|
||||
|
||||
The Raptor framework either ran a single page-load test (one URL) - or - an entire suite
|
||||
of page-load tests (multiple test URLs). Regardless, every test URL measured will
|
||||
have its own 'browsertime.json' results file, located in a sub-folder names after the
|
||||
Raptor test name, i.e.:
|
||||
|
||||
browsertime-results/
|
||||
raptor-tp6-amazon-firefox
|
||||
browsertime.json
|
||||
raptor-tp6-facebook-firefox
|
||||
browsertime.json
|
||||
raptor-tp6-google-firefox
|
||||
browsertime.json
|
||||
raptor-tp6-youtube-firefox
|
||||
browsertime.json
|
||||
|
||||
For each test URL that was measured, find the resulting 'browsertime.json' file, and pull
|
||||
out the values that we care about.
|
||||
"""
|
||||
# summarize the browsertime result data, write to file and output PERFHERDER_DATA
|
||||
LOG.info("retrieving browsertime test results")
|
||||
|
||||
for test in tests:
|
||||
bt_res_json = os.path.join(self.result_dir_for_test(test), 'browsertime.json')
|
||||
if os.path.exists(bt_res_json):
|
||||
LOG.info("found browsertime results at %s" % bt_res_json)
|
||||
else:
|
||||
LOG.critical("unable to find browsertime results at %s" % bt_res_json)
|
||||
return False
|
||||
|
||||
try:
|
||||
with open(bt_res_json, 'r') as f:
|
||||
raw_btresults = json.load(f)
|
||||
except Exception as e:
|
||||
LOG.error("Exception reading %s" % bt_res_json)
|
||||
# XXX this should be replaced by a traceback call
|
||||
LOG.error("Exception: %s %s" % (type(e).__name__, str(e)))
|
||||
raise
|
||||
|
||||
new_result = self.parse_browsertime_json(raw_btresults)
|
||||
|
||||
# add additional info not from the browsertime json
|
||||
for field in ('name', 'unit', 'lower_is_better',
|
||||
'alert_threshold', 'cold'):
|
||||
new_result[field] = test[field]
|
||||
|
||||
# Differentiate Raptor `pageload` tests from `browsertime-pageload`
|
||||
# tests while we compare and contrast.
|
||||
new_result['type'] = "browsertime-pageload"
|
||||
|
||||
# All Browsertime measurements are elapsed times in milliseconds.
|
||||
new_result['subtest_lower_is_better'] = True
|
||||
new_result['subtest_unit'] = 'ms'
|
||||
LOG.info("parsed new result: %s" % str(new_result))
|
||||
|
||||
# `extra_options` will be populated with Gecko profiling flags in
|
||||
# the future.
|
||||
new_result['extra_options'] = []
|
||||
|
||||
self.results.append(new_result)
|
||||
|
||||
# now have all results gathered from all browsertime test URLs; format them for output
|
||||
output = BrowsertimeOutput(self.results,
|
||||
self.supporting_data,
|
||||
test_config['subtest_alert_on'])
|
||||
|
||||
output.summarize(test_names)
|
||||
res, out_perfdata = output.output(test_names)
|
||||
|
||||
if not self.gecko_profile:
|
||||
# res will remain True if no problems are encountered
|
||||
# during schema validation and perferder_data counting
|
||||
res = self._validate_treeherder_data(output, out_perfdata)
|
||||
|
||||
return res
|
||||
|
|
Загрузка…
Ссылка в новой задаче