This commit is contained in:
Hannes Verschore 2015-11-12 01:37:29 -08:00
Родитель 544284603a 80a63801a3
Коммит e573ce7271
3 изменённых файлов: 157 добавлений и 113 удалений

Просмотреть файл

@ -33,6 +33,10 @@ class DBTable(object):
self.initialized = False
self.cached = None
def exists(self):
self.initialize()
return self.cached != None
def prefetch(self):
if self.table() not in self.__class__.globalcache:
self.__class__.globalcache[self.table()] = {}
@ -60,7 +64,8 @@ class DBTable(object):
return
self.prefetch()
self.cached = self.__class__.globalcache[self.table()][self.id]
if self.id in self.__class__.globalcache[self.table()]:
self.cached = self.__class__.globalcache[self.table()][self.id]
return
def get(self, field):
@ -86,7 +91,7 @@ class DBTable(object):
def delete(self):
c = awfy.db.cursor()
c.execute("DELETE FROM "+self.table()+" \
c.execute("DELETE FROM "+self.table()+" \
WHERE id = %s", (self.id, ))
@staticmethod
@ -246,7 +251,7 @@ class Regression(DBTable):
class RegressionScore(DBTable):
def score(self):
return self.get("score")
return self.get("score")
@staticmethod
def table():
@ -288,7 +293,7 @@ class RegressionScoreNoise(DBTable):
class RegressionBreakdown(DBTable):
def score(self):
return self.get("breakdown")
return self.get("breakdown")
@staticmethod
def table():
@ -633,7 +638,7 @@ class Score(RegressionTools):
suite_version_id = %s AND \
status = 1 \
ORDER BY sort_order DESC \
LIMIT 1", (sort_order, machine, mode, suite))
LIMIT "+str(limit), (sort_order, machine, mode, suite))
rows = c.fetchall()
return [Score(row[0]) for row in rows]

Просмотреть файл

@ -1,67 +1,118 @@
# vim: set ts=4 sw=4 tw=99 et:
# This class does some dirty work in unifying the structure of the graph.
class Builder:
def __init__(self):
self.lines = []
self.timemap = {}
class LineBuilder:
def __init__(self, mode_id):
self.points = []
self.mode_id = mode_id
self.time_occurence = {}
def addPoint(self, time, first, last, score, suite_version, id):
if not score:
return
def addPoint(self, points, time, first, last, score, suite_version, id):
point = { 'time': time,
'first': first,
'last': None,
'score': score,
'suite_version': suite_version,
'id': id
}
if last:
point['last'] = last
if not time in self.timemap:
self.timemap[time] = [[points, point]]
else:
self.timemap[time].append([points, point])
points.append(point)
self.points.append(point)
if time not in self.time_occurence:
self.time_occurence[time] = 0
self.time_occurence[time] += 1
# Remove any time slice that has no corresponding datapoints.
def prune(self):
empties = []
for key in self.timemap:
empty = True
points = self.timemap[key]
for L in points:
point = L[1]
if point['first']:
empty = False
break
if empty:
for L in points:
L[0].remove(L[1])
empties.append(key)
for key in empties:
del self.timemap[key]
def time_occurences(self):
return self.time_occurence
def finish(self, lines):
self.prune()
def fixup(self, max_occurences):
# sort the list of points and add 'null' datapoints
# for every given occurence of timestamp that isn't
# in this list.
# Build a sorted list of all time values, then provide a mapping from
# time values back to indexes into this list.
self.timelist = sorted(self.timemap.keys())
for i, t in enumerate(self.timelist):
self.timemap[t] = i
amount_datapoints = sum(max_occurences.values())
# Now we have a canonical list of time points across all lines. Build
# a new point list for each line, such that all lines have the same
# list of points. At this time, we also rewrite points to be lists, as
# this results in smaller exported JSON.
for i, line in enumerate(lines):
# Prefill, so each slot in the line has one point.
newlist = [None] * len(self.timelist)
for point in line['data']:
index = self.timemap[point['time']]
if 'last' in point:
newlist[index] = [point['score'], point['first'], point['last'], point['suite_version'], point['id']]
else:
newlist[index] = [point['score'], point['first'], None, point['suite_version'], point['id']]
point_map = {}
for point in self.points:
if point['time'] not in point_map:
point_map[point['time']] = []
point_map[point['time']].append(point)
line['data'] = newlist
self.points = []
for time in sorted(max_occurences.keys()):
added = 0
if time in point_map:
self.points += point_map[time]
added = len(point_map[time])
self.points += [None] * (max_occurences[time] - added)
self.time_occurence = max_occurences
return
# end class Builder
def _data(self):
data = []
for point in self.points:
if not point:
data.append(None)
else:
data.append([
point['score'],
point['first'],
point['last'],
point['suite_version'],
point['id']
])
return data
def output(self):
return {
'modeid': self.mode_id,
'data': self._data()
}
class GraphBuilder:
def __init__(self, direction):
self.direction = direction
self.lines = []
def newLine(self, mode_id):
line = LineBuilder(mode_id)
self.lines.append(line)
return line
def _calculate_max_occurences(self):
# Returns a dictionary with for every timestamp in
# all lines the maxium number of times it occurs in one line.
max_occurences = {}
for line in self.lines:
line_occurences = line.time_occurences()
for time in line_occurences:
if time not in max_occurences:
max_occurences[time] = 0
max_occurences[time] = max(max_occurences[time], line_occurences[time])
return max_occurences
def _timelist(self):
if len(self.lines) == 0:
return []
# After fixup, all lines have the same time_occurences.
# Take the first one to create the timelist.
occurences = self.lines[0].time_occurences()
timelist = []
for time in sorted(occurences.keys()):
timelist += [time] * occurences[time]
return timelist
def fixup(self):
max_occurences = self._calculate_max_occurences()
for line in self.lines:
line.fixup(max_occurences)
def output(self):
# Note: always first call fixup! Very important!
return {
'direction': self.direction,
'lines': [line.output() for line in self.lines],
'timelist': self._timelist()
}

Просмотреть файл

@ -13,7 +13,7 @@ import os.path
import datetime
import condenser, json
from profiler import Profiler
from builder import Builder
from builder import LineBuilder, GraphBuilder
def export(name, j):
path = os.path.join(awfy.path, name)
@ -42,7 +42,8 @@ def delete_metadata(prefix, data):
os.remove(name)
def fetch_test_scores(machine_id, suite_id, name,
finish_stamp = (0, "UNIX_TIMESTAMP()")):
finish_stamp = (0, "UNIX_TIMESTAMP()"),
approx_stamp = (0, "UNIX_TIMESTAMP()")):
c = awfy.db.cursor()
query = "SELECT id FROM awfy_suite_test \
WHERE name = %s"
@ -62,6 +63,8 @@ def fetch_test_scores(machine_id, suite_id, name,
AND t.id in ("+(",".join(suite_ids))+") \
AND r.status > 0 \
AND r.machine = %s \
AND r.approx_stamp >= "+str(approx_stamp[0])+" \
AND r.approx_stamp <= "+str(approx_stamp[1])+" \
AND r.finish_stamp >= "+str(finish_stamp[0])+" \
AND r.finish_stamp <= "+str(finish_stamp[1])+" \
ORDER BY r.sort_order ASC \
@ -70,7 +73,8 @@ def fetch_test_scores(machine_id, suite_id, name,
return c.fetchall()
def fetch_suite_scores(machine_id, suite_id,
finish_stamp = (0, "UNIX_TIMESTAMP()")):
finish_stamp = (0, "UNIX_TIMESTAMP()"),
approx_stamp = (0, "UNIX_TIMESTAMP()")):
query = "SELECT STRAIGHT_JOIN r.id, r.approx_stamp, b.cset, s.score, b.mode_id, v.id, s.id \
FROM awfy_run r \
JOIN awfy_build b ON r.id = b.run_id \
@ -79,6 +83,8 @@ def fetch_suite_scores(machine_id, suite_id,
WHERE v.suite_id = %s \
AND r.status > 0 \
AND r.machine = %s \
AND r.approx_stamp >= "+str(approx_stamp[0])+" \
AND r.approx_stamp <= "+str(approx_stamp[1])+" \
AND r.finish_stamp >= "+str(finish_stamp[0])+" \
AND r.finish_stamp <= "+str(finish_stamp[1])+" \
ORDER BY r.sort_order ASC \
@ -88,7 +94,8 @@ def fetch_suite_scores(machine_id, suite_id,
return c.fetchall()
def delete_cache(prefix):
os.remove(os.path.join(awfy.path, prefix + '.json'))
if os.path.exists(os.path.join(awfy.path, prefix + '.json')):
os.remove(os.path.join(awfy.path, prefix + '.json'))
def open_cache(suite, prefix):
try:
@ -125,30 +132,18 @@ def update_cache(cx, suite, prefix, when, rows):
line.append(row)
# Build our actual datasets.
lines = [ ]
builder = Builder()
graph = GraphBuilder(suite.direction)
for modeid in modes:
rows = modes[modeid]
points = []
for row in rows:
score = float(row[3])
if score:
cset = row[2]
else:
cset = None
builder.addPoint(points,
int(row[1]),
cset,
None,
score,
row[5],
row[6])
line = { 'modeid': modeid,
'data': points
}
lines.append(line)
builder.prune()
builder.finish(lines)
line = graph.newLine(modeid)
for row in modes[modeid]:
line.addPoint(int(row[1]), # time
row[2], # cset (first)
None, # None (last)
float(row[3]), # score
row[5], # suite_version
row[6]) # id
graph.fixup()
new_data = graph.output()
# Open the old cache.
cache = open_cache(suite, prefix)
@ -158,21 +153,15 @@ def update_cache(cx, suite, prefix, when, rows):
for i, oldline in enumerate(cache['lines']):
cache_modes[int(oldline['modeid'])] = oldline
# Updating fails if there are items before the last time in the cache.
if len(cache['timelist']) and len(builder.timelist):
last_time = cache['timelist'][-1]
i = 0
while i < len(builder.timelist) and builder.timelist[i] < last_time:
# Test that there are only datapoints added at the end.
# Else report to fully renew cache.
if len(cache['timelist']) and len(new_data['timelist']):
if new_data['timelist'][0] < cache['timelist'][-1]:
return False
if i:
builder.timelist = builder.timelist[i:]
for line in lines:
line['data'] = line['data'][i:]
# For any of our lines that are not in the cache, prepend null points so
# the line width matches the existing lines.
for line in lines:
for line in new_data['lines']:
if line['modeid'] in cache_modes:
continue
@ -183,7 +172,7 @@ def update_cache(cx, suite, prefix, when, rows):
cache_modes[line['modeid']] = data
# Now we can merge our data into the existing graph.
for line in lines:
for line in new_data['lines']:
oldline = cache_modes[line['modeid']]
oldline['data'].extend(line['data'])
@ -193,22 +182,22 @@ def update_cache(cx, suite, prefix, when, rows):
modeid = int(oldline['modeid'])
if modeid in modes:
continue
oldline['data'].extend([None] * len(builder.timelist))
oldline['data'].extend([None] * len(new_data['timelist']))
# Finally we can extend the cache timelist.
cache['timelist'].extend(builder.timelist)
cache['timelist'].extend(new_data['timelist'])
# Sanity check.
for line in cache['lines']:
if len(line['data']) != len(cache['timelist']):
print(str(len(line['data'])) + ' != ' + str(len(cache['timelist'])))
print len(line['data']), ' != ', len(cache['timelist'])
raise Exception('computed datapoints wrong')
# Now save the results.
save_cache(prefix, cache)
return True
def renew_cache(cx, machine, suite, prefix, when, last_stamp, fetch):
def renew_cache(cx, machine, suite, prefix, when, fetch):
delete_cache(prefix + '-' + str(when[0]) + '-' + str(when[1]));
# Delete corresponding condensed graph
@ -225,19 +214,18 @@ def renew_cache(cx, machine, suite, prefix, when, last_stamp, fetch):
next_year += 1
dt = datetime.datetime(year=next_year, month=next_month, day=1)
stop_stamp = int(time.mktime(dt.timetuple())) - 1
if last_stamp < stop_stamp:
stop_stamp = last_stamp
name = prefix + '-' + str(when[0]) + '-' + str(when[1])
# Querying all information from this month.
sys.stdout.write('Querying ' + prefix + '... ')
sys.stdout.write('Fetching monthly info ' + name + '... ')
sys.stdout.flush()
with Profiler() as p:
rows = fetch(machine, finish_stamp=(start_stamp,stop_stamp))
rows = fetch(machine, approx_stamp=(start_stamp,stop_stamp))
diff = p.time()
new_rows = len(rows)
print('found ' + str(new_rows) + ' rows in ' + diff)
name = prefix + '-' + str(when[0]) + '-' + str(when[1])
update_cache(cx, suite, name, when, rows)
def perform_update(cx, machine, suite, prefix, fetch):
@ -246,7 +234,7 @@ def perform_update(cx, machine, suite, prefix, fetch):
last_stamp = metadata['last_stamp']
current_stamp = int(time.time())
sys.stdout.write('Querying ' + prefix + '... ')
sys.stdout.write('Querying for new rows ' + prefix + '... ')
sys.stdout.flush()
with Profiler() as p:
rows = fetch(machine, finish_stamp=(last_stamp+1, current_stamp))
@ -279,12 +267,12 @@ def perform_update(cx, machine, suite, prefix, fetch):
for when, data in months:
name = prefix + '-' + str(when[0]) + '-' + str(when[1])
sys.stdout.write('Updating cache for ' + name + '...')
sys.stdout.flush()
with Profiler() as p:
if not update_cache(cx, suite, name, when, data):
renew_cache(cx, machine, suite, prefix, when, last_stamp, fetch)
renew_cache(cx, machine, suite, prefix, when, fetch)
diff = p.time()
sys.stdout.write('Updating cache for ' + name + '...')
sys.stdout.flush()
print('took ' + diff)
metadata['last_stamp'] = current_stamp
@ -294,8 +282,8 @@ def perform_update(cx, machine, suite, prefix, fetch):
# Done
def update(cx, machine, suite):
def fetch_aggregate(machine, finish_stamp = (0,"UNIX_TIMESTAMP()")):
return fetch_suite_scores(machine.id, suite.id, finish_stamp)
def fetch_aggregate(machine, finish_stamp = (0,"UNIX_TIMESTAMP()"), approx_stamp = (0,"UNIX_TIMESTAMP()")):
return fetch_suite_scores(machine.id, suite.id, finish_stamp, approx_stamp)
prefix = ""
if suite.visible == 2:
@ -310,8 +298,8 @@ def update(cx, machine, suite):
return
for test_name in suite.tests:
def fetch_test(machine, finish_stamp = (0,"UNIX_TIMESTAMP()")):
return fetch_test_scores(machine.id, suite.id, test_name, finish_stamp)
def fetch_test(machine, finish_stamp = (0,"UNIX_TIMESTAMP()"), approx_stamp = (0,"UNIX_TIMESTAMP()")):
return fetch_test_scores(machine.id, suite.id, test_name, finish_stamp, approx_stamp)
prefix = ""
if suite.visible == 2: