arewefastyet/server/condenser.py

344 строки
10 KiB
Python

# vim: set ts=4 sw=4 tw=99 et:
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import re
import os
import sys
import awfy, util
import math
from profiler import Profiler
from datetime import datetime
import glob
SecondsPerDay = 60 * 60 * 24
MaxRecentRuns = 30
class FolderChanger:
def __init__(self, folder):
self.old = os.getcwd()
self.new = folder
def __enter__(self):
os.chdir(self.new)
def __exit__(self, type, value, traceback):
os.chdir(self.old)
def export(name, j):
path = os.path.join(awfy.path, name)
if os.path.exists(path):
os.remove(path)
with open(path, 'w') as fp:
util.json_dump(j, fp)
def find_all_months(cx, prefix, name):
pattern = prefix + 'raw-' + name + '-*-*.json'
re_pattern = prefix + 'raw-' + name + '-(\d\d\d\d)-(\d+)\.json'
with FolderChanger(awfy.path):
files = []
for file in glob.glob(pattern):
m = re.match(re_pattern, file)
if not m:
continue
year = int(m.group(1))
month = int(m.group(2))
files.append(((year, month), file))
files = sorted(files, key=lambda key: key[0][0] * 12 + key[0][1])
return files
def retrieve_graphs(cx, files):
graphs = []
for when, file in files:
graphs.append((when, retrieve_graph(cx, file)))
return graphs
def retrieve_graph(cx, file):
with open(os.path.join(awfy.path, file)) as fp:
cache = util.json_load(fp)
return cache['graph']
# Take a timelist and split it into lists of which times correspond to days.
def split_into_days(timelist):
if not len(timelist):
return []
days = []
first = None
earliest = timelist[0]
for i, t in enumerate(timelist):
if t >= earliest + SecondsPerDay:
days.append((first, i))
first = i
days.append((first, i))
return days
# Aggregate the datapoints in a graph into the supplied regions. Line ordering
# stays the same.
def condense_graph(graph, regions):
# Prefill the new graph.
new_graph = { 'direction': graph['direction'],
'timelist': [],
'lines': []
}
for line in graph['lines']:
points = []
for start, end in regions:
total = 0
count = 0
first = None
last = None
suite_version = None
id = None
for i in range(start, end):
p = line['data'][i]
if not p or not p[0]:
continue
total += p[0]
count += 1
if not first:
first = p[1]
last = p[1]
suite_version = p[3]
id = p[4]
if count == 0:
avg = 0
else:
avg = total/count
points.append([avg, first, last, suite_version, id if count is 1 else None])
newline = { 'modeid': line['modeid'],
'data': points
}
new_graph['lines'].append(newline)
for start, end in regions:
new_graph['timelist'].append(graph['timelist'][start])
return new_graph
def condense_month(cx, suite, graph, prefix, name):
days = split_into_days(graph['timelist'])
new_graph = condense_graph(graph, days)
j = { 'version': awfy.version,
'graph': new_graph
}
export(name + '.json', j)
def combine(graphs):
combined = { 'lines': [],
'timelist': [],
'direction': graphs[0]['direction']
}
# Pre-fill modes.
modes = { }
for graph in graphs:
for line in graph['lines']:
if line['modeid'] in modes:
continue
obj = { 'modeid': line['modeid'],
'data': []
}
modes[line['modeid']] = obj
combined['lines'].append(obj)
for graph in graphs:
updated = { }
for line in graph['lines']:
newline = modes[line['modeid']]
newline['data'].extend(line['data'])
updated[line['modeid']] = True
for mode in modes:
if mode in updated:
continue
empty = [None] * len(graph['timelist'])
modes[mode]['data'].extend(empty)
combined['timelist'].extend(graph['timelist'])
# Sanity check.
for line in combined['lines']:
if len(line['data']) != len(combined['timelist']):
raise Exception('corrupt graph')
return combined
def aggregate(cx, suite, prefix, name):
with Profiler() as p:
sys.stdout.write('Aggregating ' + name + '... ')
sys.stdout.flush()
files = find_all_months(cx, prefix, name)
graphs = retrieve_graphs(cx, files)
graph = combine([graph for when, graph in graphs])
graph['aggregate'] = True
# If we don't have enough points for a historical view, we won't display
# a historical view.
if len(graph['timelist']) <= MaxRecentRuns:
if len(graph['timelist']) == 0:
graph['earliest'] = 0
else:
graph['earliest'] = graph['timelist'][0]
return graph
# Show MacRecentRuns of atleast one line.
recentRuns = 0
runs = []
for i in range(len(graph['lines'])):
runs.append(0)
for i in range(len(graph['timelist'])-1, -1, -1):
for j in range(len(graph['lines'])):
if graph['lines'] and i < len(graph['lines'][j]["data"]) and graph['lines'][j]["data"][i]:
runs[j] += 1
recentRuns += 1
if max(runs) == MaxRecentRuns:
break
# If the number of historical points is <= the number of recent points,
# then the graph is about split so we don't have to do anything.
historical = len(graph['timelist']) - recentRuns
if historical <= MaxRecentRuns:
graph['earliest'] = graph['timelist'][historical]
return graph
# How big should each region be?
region_length = float(historical) / MaxRecentRuns
pos = 0
regions = []
for i in range(0, MaxRecentRuns):
start = int(round(pos))
end = min(int(math.floor(pos + region_length)), historical) - 1
if end < start:
end = start
regions.append((start, end))
pos += region_length
new_graph = condense_graph(graph, regions)
for i, line in enumerate(new_graph['lines']):
oldline = graph['lines'][i]
line['data'].extend(oldline['data'][historical:])
new_graph['timelist'].extend(graph['timelist'][historical:])
new_graph['earliest'] = graph['timelist'][historical]
new_graph['aggregate'] = True
# Sanity check.
for line in new_graph['lines']:
if len(line['data']) != len(new_graph['timelist']):
raise Exception('corrupt graph')
diff = p.time()
print('took ' + diff)
return new_graph
def file_is_newer(file1, file2):
return os.path.getmtime(file1) >= os.path.getmtime(file2)
def condense(cx, suite, prefix, name):
with Profiler() as p:
sys.stdout.write('Importing all datapoints for ' + name + '... ')
sys.stdout.flush()
files = find_all_months(cx, prefix, name)
diff = p.time()
print('took ' + diff)
if not len(files):
return False
change = False
for when, raw_file in files:
condensed_name = prefix + 'condensed-' + name + '-' + str(when[0]) + '-' + str(when[1])
condensed_file = condensed_name + '.json'
# Only update the graph when condensed file is older.
if os.path.exists(os.path.join(awfy.path, condensed_file)) and file_is_newer(os.path.join(awfy.path, condensed_file), os.path.join(awfy.path, raw_file)):
continue
# There was a datapoint added to one of the condensed files.
change = True
with Profiler() as p:
sys.stdout.write('Condensing ' + condensed_name + '... ')
sys.stdout.flush()
graph = retrieve_graph(cx, raw_file)
condense_month(cx, suite, graph, prefix, condensed_name)
diff = p.time()
print(' took ' + diff)
return change
def condense_suite(cx, machine, suite):
name = suite.name + '-' + str(machine.id)
prefix = ""
if suite.visible == 2:
prefix = "auth-"
# Condense suite
change = condense(cx, suite, prefix, name)
# Aggregate suite if needed.
aggregated_file = prefix + 'aggregate-' + name + '.json'
if change:
j = { 'version': awfy.version,
'graph': aggregate(cx, suite, prefix, name)
}
export(aggregated_file, j)
# Note: only run the subtest condenser when suite was changed.
for test_name in suite.tests:
test_path = suite.name + '-' + test_name + '-' + str(machine.id)
# Condense test
change = condense(cx, suite, prefix + 'bk-', test_path)
# Aggregate suite if needed.
if change:
j = { 'version': awfy.version,
'graph': aggregate(cx, suite, prefix + 'bk-', test_path)
}
export(prefix + 'bk-aggregate-' + test_path + '.json', j)
return retrieve_graph(cx, aggregated_file)
def condense_all(cx):
for machine in cx.machines:
# If a machine is set to no longer report scores, don't condense it.
if machine.active == 2:
continue
aggregates = { }
for suite in cx.benchmarks:
if suite.name == 'v8':
continue
suite_aggregate = condense_suite(cx, machine, suite)
if suite.name == 'misc':
continue
if suite.visible == 2:
continue
aggregates[suite.name] = suite_aggregate
j = { 'version': awfy.version,
'graphs': aggregates
}
export('aggregate-' + str(machine.id) + '.json', j)