Merge branch 'master' of https://github.com/h4writer/arewefastyet

2015-11-12 01:37:29 -08:00 · 2015-11-12 01:37:29 -08:00 · e573ce7271
--- a/detector/tables.py
+++ b/detector/tables.py
@ -33,6 +33,10 @@ class DBTable(object):
    self.initialized = False
    self.cached = None

+  def exists(self):
+    self.initialize()
+    return self.cached != None
+
  def prefetch(self):
    if self.table() not in self.__class__.globalcache:
      self.__class__.globalcache[self.table()] = {}
@ -60,7 +64,8 @@ class DBTable(object):
        return

    self.prefetch()
-    self.cached = self.__class__.globalcache[self.table()][self.id]
+    if self.id in self.__class__.globalcache[self.table()]:
+        self.cached = self.__class__.globalcache[self.table()][self.id]
    return

  def get(self, field):
@ -86,7 +91,7 @@ class DBTable(object):

  def delete(self):
    c = awfy.db.cursor()
-    c.execute("DELETE FROM "+self.table()+"										\
+    c.execute("DELETE FROM "+self.table()+"                                        \
               WHERE id = %s", (self.id, ))

  @staticmethod
@ -246,7 +251,7 @@ class Regression(DBTable):
 class RegressionScore(DBTable):

  def score(self):
-	return self.get("score")
+    return self.get("score")

  @staticmethod
  def table():
@ -288,7 +293,7 @@ class RegressionScoreNoise(DBTable):
 class RegressionBreakdown(DBTable):

  def score(self):
-	return self.get("breakdown")
+    return self.get("breakdown")

  @staticmethod
  def table():
@ -633,7 +638,7 @@ class Score(RegressionTools):
                     suite_version_id = %s AND                                        \
                     status = 1                                                       \
               ORDER BY sort_order DESC                                               \
-               LIMIT 1", (sort_order, machine, mode, suite))
+               LIMIT "+str(limit), (sort_order, machine, mode, suite))
    rows = c.fetchall()
    return [Score(row[0]) for row in rows]

--- a/server/builder.py
+++ b/server/builder.py
@ -1,67 +1,118 @@
 # vim: set ts=4 sw=4 tw=99 et:

-# This class does some dirty work in unifying the structure of the graph.
-class Builder:
-    def __init__(self):
-        self.lines = []
-        self.timemap = {}
+class LineBuilder:
+    def __init__(self, mode_id):
+        self.points = []
+        self.mode_id = mode_id
+        self.time_occurence = {} 
+
+    def addPoint(self, time, first, last, score, suite_version, id):
+        if not score:
+            return

-    def addPoint(self, points, time, first, last, score, suite_version, id):
        point = { 'time': time,
                  'first': first,
+                  'last': None,
                  'score': score,
                  'suite_version': suite_version,
                  'id': id
                }
-        if last:
-            point['last'] = last
-        if not time in self.timemap:
-            self.timemap[time] = [[points, point]]
-        else:
-            self.timemap[time].append([points, point])
-        points.append(point)
+        self.points.append(point)
+        if time not in self.time_occurence:
+            self.time_occurence[time] = 0
+        self.time_occurence[time] += 1

-    # Remove any time slice that has no corresponding datapoints.
-    def prune(self):
-        empties = []
-        for key in self.timemap:
-            empty = True
-            points = self.timemap[key]
-            for L in points:
-                point = L[1]
-                if point['first']:
-                    empty = False
-                    break
-            if empty:
-                for L in points:
-                    L[0].remove(L[1])
-                empties.append(key)
-        for key in empties:
-            del self.timemap[key]
+    def time_occurences(self):
+        return self.time_occurence

-    def finish(self, lines):
-        self.prune()
+    def fixup(self, max_occurences):
+        # sort the list of points and add 'null' datapoints
+        # for every given occurence of timestamp that isn't
+        # in this list.

-        # Build a sorted list of all time values, then provide a mapping from
-        # time values back to indexes into this list.
-        self.timelist = sorted(self.timemap.keys())
-        for i, t in enumerate(self.timelist):
-            self.timemap[t] = i
+        amount_datapoints = sum(max_occurences.values())

-        # Now we have a canonical list of time points across all lines. Build
-        # a new point list for each line, such that all lines have the same
-        # list of points. At this time, we also rewrite points to be lists, as
-        # this results in smaller exported JSON.
-        for i, line in enumerate(lines):
-            # Prefill, so each slot in the line has one point.
-            newlist = [None] * len(self.timelist)
-            for point in line['data']:
-                index = self.timemap[point['time']]
-                if 'last' in point:
-                    newlist[index] = [point['score'], point['first'], point['last'], point['suite_version'], point['id']]
-                else:
-                    newlist[index] = [point['score'], point['first'], None, point['suite_version'], point['id']]
+        point_map = {}
+        for point in self.points:
+            if point['time'] not in point_map:
+                point_map[point['time']] = []
+            point_map[point['time']].append(point)

-            line['data'] = newlist
+        self.points = []
+        for time in sorted(max_occurences.keys()):
+            added = 0
+            if time in point_map:
+                self.points += point_map[time]
+                added = len(point_map[time])
+            self.points += [None] * (max_occurences[time] - added)
+
+        self.time_occurence = max_occurences
        return
-# end class Builder
+
+    def _data(self):
+        data = []
+        for point in self.points:
+            if not point:
+                data.append(None)
+            else:
+                data.append([
+                    point['score'],
+                    point['first'],
+                    point['last'],
+                    point['suite_version'],
+                    point['id']
+                ])
+        return data
+
+    def output(self):
+        return {
+            'modeid': self.mode_id,
+            'data':  self._data()
+        }
+
+class GraphBuilder:
+    def __init__(self, direction):
+        self.direction = direction
+        self.lines = []
+
+    def newLine(self, mode_id):
+        line = LineBuilder(mode_id)
+        self.lines.append(line)
+        return line 
+        
+    def _calculate_max_occurences(self):
+        # Returns a dictionary with for every timestamp in
+        # all lines the maxium number of times it occurs in one line.
+        max_occurences = {}
+        for line in self.lines:
+            line_occurences = line.time_occurences()
+            for time in line_occurences:
+                if time not in max_occurences:
+                    max_occurences[time] = 0
+                max_occurences[time] = max(max_occurences[time], line_occurences[time]) 
+        return max_occurences
+
+    def _timelist(self):
+        if len(self.lines) == 0:
+            return []    
+
+        # After fixup, all lines have the same time_occurences.
+        # Take the first one to create the timelist.
+        occurences = self.lines[0].time_occurences()
+        timelist = []
+        for time in sorted(occurences.keys()):
+            timelist += [time] * occurences[time]
+        return timelist
+
+    def fixup(self):
+        max_occurences = self._calculate_max_occurences()
+        for line in self.lines:
+            line.fixup(max_occurences)
+
+    def output(self):
+        # Note: always first call fixup! Very important!
+        return {
+            'direction': self.direction,
+            'lines': [line.output() for line in self.lines],
+            'timelist': self._timelist()
+        }
--- a/server/update.py
+++ b/server/update.py
@ -13,7 +13,7 @@ import os.path
 import datetime
 import condenser, json
 from profiler import Profiler
-from builder import Builder
+from builder import LineBuilder, GraphBuilder

 def export(name, j):
    path = os.path.join(awfy.path, name)
@ -42,7 +42,8 @@ def delete_metadata(prefix, data):
        os.remove(name)

 def fetch_test_scores(machine_id, suite_id, name,
-                      finish_stamp = (0, "UNIX_TIMESTAMP()")):
+                      finish_stamp = (0, "UNIX_TIMESTAMP()"),
+                      approx_stamp = (0, "UNIX_TIMESTAMP()")):
    c = awfy.db.cursor()
    query = "SELECT id FROM awfy_suite_test \
             WHERE name = %s"
@ -62,6 +63,8 @@ def fetch_test_scores(machine_id, suite_id, name,
             AND t.id in ("+(",".join(suite_ids))+")                                \
             AND r.status > 0                                                       \
             AND r.machine = %s                                                     \
+             AND r.approx_stamp >= "+str(approx_stamp[0])+"                         \
+             AND r.approx_stamp <= "+str(approx_stamp[1])+"                         \
             AND r.finish_stamp >= "+str(finish_stamp[0])+"                         \
             AND r.finish_stamp <= "+str(finish_stamp[1])+"                         \
             ORDER BY r.sort_order ASC                                              \
@ -70,7 +73,8 @@ def fetch_test_scores(machine_id, suite_id, name,
    return c.fetchall()

 def fetch_suite_scores(machine_id, suite_id,
-                       finish_stamp = (0, "UNIX_TIMESTAMP()")):
+                       finish_stamp = (0, "UNIX_TIMESTAMP()"),
+                       approx_stamp = (0, "UNIX_TIMESTAMP()")):
    query = "SELECT STRAIGHT_JOIN r.id, r.approx_stamp, b.cset, s.score, b.mode_id, v.id, s.id   \
             FROM awfy_run r                                                        \
             JOIN awfy_build b ON r.id = b.run_id                                   \
@ -79,6 +83,8 @@ def fetch_suite_scores(machine_id, suite_id,
             WHERE v.suite_id = %s                                                  \
             AND r.status > 0                                                       \
             AND r.machine = %s                                                     \
+             AND r.approx_stamp >= "+str(approx_stamp[0])+"                         \
+             AND r.approx_stamp <= "+str(approx_stamp[1])+"                         \
             AND r.finish_stamp >= "+str(finish_stamp[0])+"                         \
             AND r.finish_stamp <= "+str(finish_stamp[1])+"                         \
             ORDER BY r.sort_order ASC                                              \
@ -88,7 +94,8 @@ def fetch_suite_scores(machine_id, suite_id,
    return c.fetchall()

 def delete_cache(prefix):
-    os.remove(os.path.join(awfy.path, prefix + '.json'))
+    if os.path.exists(os.path.join(awfy.path, prefix + '.json')):
+        os.remove(os.path.join(awfy.path, prefix + '.json'))

 def open_cache(suite, prefix):
    try:
@ -125,30 +132,18 @@ def update_cache(cx, suite, prefix, when, rows):
        line.append(row)

    # Build our actual datasets.
-    lines = [ ]
-    builder = Builder()
+    graph = GraphBuilder(suite.direction)
    for modeid in modes:
-        rows = modes[modeid]
-        points = []
-        for row in rows:
-            score = float(row[3])
-            if score:
-                cset = row[2]
-            else:
-                cset = None
-            builder.addPoint(points,
-                             int(row[1]),
-                             cset,
-                             None,
-                             score,
-                             row[5],
-                             row[6])
-        line = { 'modeid': modeid,
-                 'data': points
-               }
-        lines.append(line)
-    builder.prune()
-    builder.finish(lines)
+        line = graph.newLine(modeid)
+        for row in modes[modeid]:
+            line.addPoint(int(row[1]),    # time
+                          row[2],         # cset (first)
+                          None,           # None (last)
+                          float(row[3]),  # score
+                          row[5],         # suite_version
+                          row[6])         # id
+    graph.fixup()
+    new_data = graph.output()

    # Open the old cache.
    cache = open_cache(suite, prefix)
@ -158,21 +153,15 @@ def update_cache(cx, suite, prefix, when, rows):
    for i, oldline in enumerate(cache['lines']):
        cache_modes[int(oldline['modeid'])] = oldline

-    # Updating fails if there are items before the last time in the cache.
-    if len(cache['timelist']) and len(builder.timelist):
-        last_time = cache['timelist'][-1]
-        i = 0
-        while i < len(builder.timelist) and builder.timelist[i] < last_time:
+    # Test that there are only datapoints added at the end. 
+    # Else report to fully renew cache. 
+    if len(cache['timelist']) and len(new_data['timelist']):
+        if new_data['timelist'][0] < cache['timelist'][-1]:
            return False
-        if i:
-            builder.timelist = builder.timelist[i:]
-            for line in lines:
-                line['data'] = line['data'][i:]
-

    # For any of our lines that are not in the cache, prepend null points so
    # the line width matches the existing lines.
-    for line in lines:
+    for line in new_data['lines']:
        if line['modeid'] in cache_modes:
            continue

@ -183,7 +172,7 @@ def update_cache(cx, suite, prefix, when, rows):
        cache_modes[line['modeid']] = data

    # Now we can merge our data into the existing graph.
-    for line in lines:
+    for line in new_data['lines']:
        oldline = cache_modes[line['modeid']]
        oldline['data'].extend(line['data'])

@ -193,22 +182,22 @@ def update_cache(cx, suite, prefix, when, rows):
        modeid = int(oldline['modeid'])
        if modeid in modes:
            continue
-        oldline['data'].extend([None] * len(builder.timelist))
+        oldline['data'].extend([None] * len(new_data['timelist']))

    # Finally we can extend the cache timelist.
-    cache['timelist'].extend(builder.timelist)
+    cache['timelist'].extend(new_data['timelist'])

    # Sanity check.
    for line in cache['lines']:
        if len(line['data']) != len(cache['timelist']):
-            print(str(len(line['data'])) + ' != ' + str(len(cache['timelist'])))
+            print len(line['data']), ' != ', len(cache['timelist'])
            raise Exception('computed datapoints wrong')

    # Now save the results.
    save_cache(prefix, cache)
    return True

-def renew_cache(cx, machine, suite, prefix, when, last_stamp, fetch):
+def renew_cache(cx, machine, suite, prefix, when, fetch):
    delete_cache(prefix + '-' + str(when[0]) + '-' + str(when[1]));

    # Delete corresponding condensed graph
@ -225,19 +214,18 @@ def renew_cache(cx, machine, suite, prefix, when, last_stamp, fetch):
        next_year += 1
    dt = datetime.datetime(year=next_year, month=next_month, day=1)
    stop_stamp = int(time.mktime(dt.timetuple())) - 1
-    if last_stamp < stop_stamp:
-        stop_stamp = last_stamp
+
+    name = prefix + '-' + str(when[0]) + '-' + str(when[1])

    # Querying all information from this month.
-    sys.stdout.write('Querying ' + prefix + '... ')
+    sys.stdout.write('Fetching monthly info ' + name + '... ')
    sys.stdout.flush()
    with Profiler() as p:
-        rows = fetch(machine, finish_stamp=(start_stamp,stop_stamp))
+        rows = fetch(machine, approx_stamp=(start_stamp,stop_stamp))
        diff = p.time()
    new_rows = len(rows)
    print('found ' + str(new_rows) + ' rows in ' + diff)

-    name = prefix + '-' + str(when[0]) + '-' + str(when[1])
    update_cache(cx, suite, name, when, rows) 

 def perform_update(cx, machine, suite, prefix, fetch):
@ -246,7 +234,7 @@ def perform_update(cx, machine, suite, prefix, fetch):
    last_stamp = metadata['last_stamp']
    current_stamp = int(time.time())

-    sys.stdout.write('Querying ' + prefix + '... ')
+    sys.stdout.write('Querying for new rows ' + prefix + '... ')
    sys.stdout.flush()
    with Profiler() as p:
        rows = fetch(machine, finish_stamp=(last_stamp+1, current_stamp))
@ -279,12 +267,12 @@ def perform_update(cx, machine, suite, prefix, fetch):
    for when, data in months:
        name = prefix + '-' + str(when[0]) + '-' + str(when[1])

-        sys.stdout.write('Updating cache for ' + name + '...')
-        sys.stdout.flush()
        with Profiler() as p:
            if not update_cache(cx, suite, name, when, data):
-                renew_cache(cx, machine, suite, prefix, when, last_stamp, fetch)
+                renew_cache(cx, machine, suite, prefix, when, fetch)
            diff = p.time()
+        sys.stdout.write('Updating cache for ' + name + '...')
+        sys.stdout.flush()
        print('took ' + diff)

    metadata['last_stamp'] = current_stamp
@ -294,8 +282,8 @@ def perform_update(cx, machine, suite, prefix, fetch):
 # Done

 def update(cx, machine, suite):
-    def fetch_aggregate(machine, finish_stamp = (0,"UNIX_TIMESTAMP()")):
-        return fetch_suite_scores(machine.id, suite.id, finish_stamp)
+    def fetch_aggregate(machine, finish_stamp = (0,"UNIX_TIMESTAMP()"), approx_stamp = (0,"UNIX_TIMESTAMP()")):
+        return fetch_suite_scores(machine.id, suite.id, finish_stamp, approx_stamp)

    prefix = ""
    if suite.visible == 2:
@ -310,8 +298,8 @@ def update(cx, machine, suite):
        return

    for test_name in suite.tests:
-        def fetch_test(machine, finish_stamp = (0,"UNIX_TIMESTAMP()")):
-            return fetch_test_scores(machine.id, suite.id, test_name, finish_stamp)
+        def fetch_test(machine, finish_stamp = (0,"UNIX_TIMESTAMP()"), approx_stamp = (0,"UNIX_TIMESTAMP()")):
+            return fetch_test_scores(machine.id, suite.id, test_name, finish_stamp, approx_stamp)

        prefix = ""
        if suite.visible == 2: