From e1077aca41f6e69659803802ac2dfda05c142eff Mon Sep 17 00:00:00 2001 From: William Lachance Date: Mon, 15 Jun 2015 16:44:00 -0400 Subject: [PATCH] Bug 1174877 - Updates to make perf alert analysis code useful for perfherder * Using machine history is now optional (perfherder doesn't track it, and we don't think we need it) * Performance datums, analyze_t take keyword arguments to make API more intuitive * Various other minor updates to make code easier to understand --- tests/perfalert/test_analyze.py | 14 ++- treeherder/perfalert/perfalert/__init__.py | 104 ++++++++++++--------- 2 files changed, 67 insertions(+), 51 deletions(-) diff --git a/tests/perfalert/test_analyze.py b/tests/perfalert/test_analyze.py index b75ac0126..bb8c0fe9e 100644 --- a/tests/perfalert/test_analyze.py +++ b/tests/perfalert/test_analyze.py @@ -37,15 +37,16 @@ class TestTalosAnalyzer(unittest.TestCase): def get_data(self): times = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] values = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] - return [PerfDatum(t, t, t, float(v), t, t) for t, v in zip(times, values)] + return [PerfDatum(t, float(v)) for t, v in zip(times, values)] def test_analyze_t(self): a = TalosAnalyzer() data = self.get_data() a.addData(data) - - result = [(d.time, d.state) for d in a.analyze_t(5, 5, 2, 15, 5)] + result = [(d.push_timestamp, d.state) for d in + a.analyze_t(back_window=5, fore_window=5, t_threshold=2, + machine_threshold=15, machine_history_size=5)] self.assertEqual(result, [ (1, 'good'), (2, 'good'), @@ -78,13 +79,16 @@ class TestTalosAnalyzer(unittest.TestCase): payload = SampleData.get_perf_data(os.path.join('graphs', filename)) runs = payload['test_runs'] - data = [PerfDatum(r[0], r[6], r[2], r[3], r[1][1], r[2], r[1][2]) for r in runs] + data = [PerfDatum(r[2], r[3], testrun_id=r[0], machine_id=r[6], + testrun_timestamp=r[2], buildid=r[1][1], + revision=r[1][2]) for r in runs] a = TalosAnalyzer() a.addData(data) results = a.analyze_t(BACK_WINDOW, FORE_WINDOW, THRESHOLD, MACHINE_THRESHOLD, MACHINE_HISTORY_SIZE) - regression_timestamps = [d.timestamp for d in results if d.state == 'regression'] + regression_timestamps = [d.testrun_timestamp for d in results if + d.state == 'regression'] self.assertEqual(regression_timestamps, expected_timestamps) if __name__ == '__main__': diff --git a/treeherder/perfalert/perfalert/__init__.py b/treeherder/perfalert/perfalert/__init__.py index 0e748f882..44a4ead23 100644 --- a/treeherder/perfalert/perfalert/__init__.py +++ b/treeherder/perfalert/perfalert/__init__.py @@ -61,27 +61,30 @@ def calc_t(w1, w2, weight_fn=None): class PerfDatum(object): - - __slots__ = ('testrun_id', 'machine_id', 'timestamp', 'value', 'buildid', - 'time', 'revision', 'run_number', 'last_other', 'historical_stats', - 'forward_stats', 't', 'state') - - def __init__(self, testrun_id, machine_id, timestamp, value, buildid, time, + def __init__(self, push_timestamp, value, testrun_timestamp=None, + buildid=None, testrun_id=None, machine_id=None, revision=None, state='good'): + # Date code was pushed + self.push_timestamp = push_timestamp + # Value of this point + self.value = value + + # Which build was this + self.buildid = buildid + # timestamp when test was run + if testrun_timestamp: + self.testrun_timestamp = testrun_timestamp + else: + # in some cases we may not have information on when test was run + # in that case just pretend its the same as when it was pushed + self.testrun_timestamp = push_timestamp # Which test run was this self.testrun_id = testrun_id # Which machine is this self.machine_id = machine_id - # Talos timestamp - self.timestamp = timestamp - # Value of this point - self.value = value - # Which build was this - self.buildid = buildid - # Date code was pushed - self.time = time # What revision this data is for self.revision = revision + # t-test score self.t = 0 # Whether a machine issue or perf regression is found @@ -89,24 +92,28 @@ class PerfDatum(object): def __cmp__(self, o): return cmp( - (self.time, self.timestamp), - (o.time, o.timestamp), + (self.push_timestamp, self.testrun_timestamp), + (o.push_timestamp, o.testrun_timestamp), ) def __eq__(self, o): return cmp( - (self.timestamp, self.value, self.buildid, self.machine_id), - (o.timestamp, o.value, o.buildid, o.machine_id), + (self.testrun_timestamp, self.value, self.buildid, self.machine_id), + (o.testrun_timestamp, o.value, o.buildid, o.machine_id), ) == 0 def __ne__(self, o): return not self == o def __repr__(self): - return "<%s: %.3f, %i, %s>" % (self.buildid, self.value, self.timestamp, self.machine_id) + return "<%s: %.3f, %i, %s>" % (self.buildid, self.value, + self.testrun_timestamp, self.machine_id) def __str__(self): - return "Build %s on %s %s %s %s" % (self.buildid, self.timestamp, self.time, self.value, self.machine_id) + return "Build %s on %s %s %s %s" % (self.buildid, + self.testrun_timestamp, + self.push_timestamp, self.value, + self.machine_id) class TalosAnalyzer: @@ -124,9 +131,11 @@ class TalosAnalyzer: for d in self.machine_history.values(): d.sort() - def analyze_t(self, j, k, threshold, machine_threshold, machine_history_size): + def analyze_t(self, back_window=12, fore_window=12, t_threshold=7, + machine_threshold=None, machine_history_size=None): # Use T-Tests # Analyze test data using T-Tests, comparing data[i-j:i] to data[i:i+k] + (j, k) = (back_window, fore_window) good_data = [] num_points = len(self.data) - k + 1 @@ -139,17 +148,6 @@ class TalosAnalyzer: # start of the window. jw.reverse() - my_history = self.machine_history[di.machine_id] - my_history_index = my_history.index(di) - my_data = [d.value for d in self.machine_history[di.machine_id][my_history_index-machine_history_size+1:my_history_index+1]] - other_data = [] - l = len(good_data)-1 - while len(other_data) < k*2 and l > 0: - dl = good_data[l] - if dl.machine_id != di.machine_id: - other_data.insert(0, dl.value) - l -= 1 - di.historical_stats = analyze(jw) di.forward_stats = analyze(kw) @@ -159,30 +157,44 @@ class TalosAnalyzer: # Assume it's ok, we don't have enough data di.t = 0 - if len(other_data) >= k*2 and len(my_data) >= machine_history_size: - m_t = calc_t(other_data, my_data, linear_weights) + if machine_threshold is None: + good_data.append(di) else: - m_t = 0 - - if abs(m_t) >= machine_threshold: + my_history = self.machine_history[di.machine_id] + my_history_index = my_history.index(di) + my_data = [d.value for d in self.machine_history[di.machine_id][my_history_index-machine_history_size+1:my_history_index+1]] + other_data = [] l = len(good_data)-1 - while l >= 0: + while len(other_data) < k*2 and l > 0: dl = good_data[l] if dl.machine_id != di.machine_id: - di.last_other = dl - break + other_data.insert(0, dl.value) l -= 1 - # We think this machine is bad, so don't add its data to the - # set of good data - di.state = 'machine' - else: - good_data.append(di) + + if len(other_data) >= k*2 and len(my_data) >= machine_history_size: + m_t = calc_t(other_data, my_data, linear_weights) + else: + m_t = 0 + + if abs(m_t) >= machine_threshold: + l = len(good_data)-1 + while l >= 0: + dl = good_data[l] + if dl.machine_id != di.machine_id: + di.last_other = dl + break + l -= 1 + # We think this machine is bad, so don't add its data to the + # set of good data + di.state = 'machine' + else: + good_data.append(di) # Now that the t-test scores are calculated, go back through the data to # find where regressions most likely happened. for i in range(1, len(good_data) - 1): di = good_data[i] - if di.t <= threshold: + if di.t <= t_threshold: continue # Check the adjacent points