Bug 858756 (2/2) - Blame the changeset whose t score is a local maximum [r=catlee]

When several changesets in a row are potential causes of a regression, blame the one with the highest t-test score rather than the first one.
2013-04-17 12:11:05 -07:00 · 2013-04-17 12:11:05 -07:00 · da3dc7de29
--- a/treeherder/perfalert/perfalert/analyze.py
+++ b/treeherder/perfalert/perfalert/analyze.py
@ -26,9 +26,9 @@ def calc_t(w1, w2):
 class PerfDatum(object):
    __slots__ = ('testrun_id', 'machine_id', 'timestamp', 'value', 'buildid',
            'time', 'revision', 'run_number', 'last_other', 'historical_stats',
-            'forward_stats')
+            'forward_stats', 't', 'state')
    def __init__(self, testrun_id, machine_id, timestamp, value, buildid, time,
-            revision=None):
+            revision=None, state='good'):
        # Which test run was this
        self.testrun_id = testrun_id
        # Which machine is this
@ -43,6 +43,10 @@ class PerfDatum(object):
        self.time = time
        # What revision this data is for
        self.revision = revision
+        # t-test score
+        self.t = 0
+        # Whether a machine issue or perf regression is found
+        self.state = state

    def __cmp__(self, o):
        return cmp(
@ -86,7 +90,8 @@ class TalosAnalyzer:
        # Analyze test data using T-Tests, comparing data[i-j:i] to data[i:i+k]
        good_data = []

-        for i in range(len(self.data)-k+1):
+        num_points = len(self.data) - k + 1
+        for i in range(num_points):
            di = self.data[i]
            jw = [d.value for d in good_data[-j:]]
            kw = [d.value for d in self.data[i:i+k]]
@ -106,10 +111,10 @@ class TalosAnalyzer:
            di.forward_stats = analyze(kw)

            if len(jw) >= j:
-                t = calc_t(jw, kw)
+                di.t = abs(calc_t(jw, kw))
            else:
                # Assume it's ok, we don't have enough data
-                t = 0
+                di.t = 0

            if len(other_data) >= k*2 and len(my_data) >= machine_history_size:
                m_t = calc_t(other_data, my_data)
@ -126,12 +131,30 @@ class TalosAnalyzer:
                    l -= 1
                # We think this machine is bad, so don't add its data to the
                # set of good data
-                yield di, "machine"
-            elif abs(t) <= threshold:
-                good_data.append(di)
-                yield di, "good"
+                di.state = 'machine'
            else:
-                # By including the data point as part of the "good" data, we slowly
-                # adjust to the new baseline.
                good_data.append(di)
-                yield di, "regression"
+
+        # Now that the t-test scores are calculated, go back through the data to
+        # find where regressions most likely happened.
+        for i in range(1, len(good_data) - 1):
+            di = good_data[i]
+            if di.t <= threshold:
+                continue
+
+            # Check the adjacent points
+            prev = good_data[i-1]
+            if prev.t > di.t:
+                continue
+            next = good_data[i+1]
+            if next.t > di.t:
+                continue
+
+            # This datapoint has a t value higher than the threshold and higher
+            # than either neighbor.  Mark it as the cause of a regression.
+            di.state = 'regression'
+
+        # Return all but the first and last points whose scores we calculated,
+        # since we can only produce a final decision for a point whose scores
+        # were compared to both of its neighbors.
+        return self.data[1:num_points-1]
--- a/treeherder/perfalert/perfalert/analyze_talos.py
+++ b/treeherder/perfalert/perfalert/analyze_talos.py
@ -677,14 +677,14 @@ class AnalysisRunner:
        basename = "%s/%s-%s-%s" % (graph_dir,
                series.branch_name, series.os_name, test_name)

-        for d, state, skip, last_good in series_data:
+        for d, skip, last_good in series_data:
            graph_point = (d.time * 1000, d.value)
            all_data.append(graph_point)
-            if state == "good":
+            if d.state == "good":
                good_data.append(graph_point)
-            elif state == "regression":
+            elif d.state == "regression":
                regressions.append(graph_point)
-            elif state == "machine":
+            elif d.state == "machine":
                bad_machines.setdefault(d.machine_id, []).append(graph_point)

        log.debug("Creating graph %s", basename)
@ -828,33 +828,33 @@ class AnalysisRunner:
        warnings = self.warning_history[s.branch_name][s.os_name][s.test_name]

        series_data = self.processSeries(analysis_gen, warnings)
-        for d, state, skip, last_good in series_data:
-            self.handleData(s, d, state, skip, last_good)
+        for d, skip, last_good in series_data:
+            self.handleData(s, d, d.state, skip, last_good)

        if self.config.has_option('main', 'graph_dir'):
            self.outputGraphs(s, series_data)

    def processSeries(self, analysis_gen, warnings):
        last_good = None
-        last_err = None
-        last_err_good = None
        # Uncomment this for debugging!
        #cutoff = self.options.start_time
        cutoff = time.time() - 7*24*3600
        series_data = []
-        for d, state in analysis_gen:
+        for d in analysis_gen:
            skip = False
            if d.timestamp < cutoff:
                continue

-            if state != "good":
+            if d.state == "good":
+                last_good = d
+            else:
                # Skip warnings about regressions we've already
                # warned people about
                if (d.buildid, d.timestamp) in warnings:
                    skip = True
                else:
                    warnings.append((d.buildid, d.timestamp))
-                    if state == "machine":
+                    if d.state == "machine":
                        machine_name = self.source.getMachineName(d.machine_id)
                        if 'bad_machines' not in self.warning_history:
                            self.warning_history['bad_machines'] = {}
@ -865,17 +865,7 @@ class AnalysisRunner:
                            # If it was over a week ago, then send another warning
                            self.warning_history['bad_machines'][machine_name] = time.time()

-                if not last_err:
-                    last_err = d
-                    last_err_good = last_good
-                elif last_err_good == last_good:
-                    skip = True
-
-            else:
-                last_err = None
-                last_good = d
-
-            series_data.append((d, state, skip, last_good))
+            series_data.append((d, skip, last_good))

        return series_data

--- a/treeherder/perfalert/perfalert/test_analyze.py
+++ b/treeherder/perfalert/perfalert/test_analyze.py
@ -30,20 +30,18 @@ class TestTalosAnalyzer(unittest.TestCase):
        data = self.get_data()
        a.addData(data)

-        result = [(d.time, state) for d, state in a.analyze_t(5, 5, 2, 15, 5)]
+        result = [(d.time, d.state) for d in a.analyze_t(5, 5, 2, 15, 5)]
        self.assertEqual(result, [
-            (0, 'good'),
            (1, 'good'),
            (2, 'good'),
            (3, 'good'),
            (4, 'good'),
            (5, 'good'),
-            (6, 'regression'),
-            (7, 'regression'),
+            (6, 'good'),
+            (7, 'good'),
            (8, 'regression'),
-            (9, 'regression'),
-            (10, 'regression'),
-            (11, 'good')])
+            (9, 'good'),
+            (10, 'good')])

 if __name__ == '__main__':
    unittest.main()
--- a/treeherder/perfalert/perfalert/test_analyze_talos.py
+++ b/treeherder/perfalert/perfalert/test_analyze_talos.py
@ -26,14 +26,14 @@ class TestAnalysisRunner(unittest.TestCase):

    def get_data(self):
        return [
-            (PerfDatum(0, 0, time() + 0, 0.0, 0, 0), 'good'),
-            (PerfDatum(1, 1, time() + 1, 0.0, 1, 1), 'good'),
-            (PerfDatum(2, 2, time() + 2, 0.0, 2, 2), 'good'),
-            (PerfDatum(3, 3, time() + 3, 0.0, 3, 3), 'good'),
-            (PerfDatum(4, 4, time() + 4, 1.0, 4, 4), 'regression'),
-            (PerfDatum(5, 5, time() + 5, 1.0, 5, 5), 'good'),
-            (PerfDatum(6, 6, time() + 6, 1.0, 6, 6), 'good'),
-            (PerfDatum(7, 7, time() + 7, 1.0, 7, 7), 'good'),
+            PerfDatum(0, 0, time() + 0, 0.0, 0, 0, state='good'),
+            PerfDatum(1, 1, time() + 1, 0.0, 1, 1, state='good'),
+            PerfDatum(2, 2, time() + 2, 0.0, 2, 2, state='good'),
+            PerfDatum(3, 3, time() + 3, 0.0, 3, 3, state='good'),
+            PerfDatum(4, 4, time() + 4, 1.0, 4, 4, state='regression'),
+            PerfDatum(5, 5, time() + 5, 1.0, 5, 5, state='good'),
+            PerfDatum(6, 6, time() + 6, 1.0, 6, 6, state='good'),
+            PerfDatum(7, 7, time() + 7, 1.0, 7, 7, state='good'),
        ]

    def test_processSeries(self):
@ -44,12 +44,12 @@ class TestAnalysisRunner(unittest.TestCase):
        results = runner.processSeries(data, [])
        self.assertEqual(len(results), 8)

-        skipped = filter(lambda (d, state, skip, last_good): skip, results)
+        skipped = filter(lambda (d, skip, last_good): skip, results)
        self.assertEqual(len(skipped), 0)

-        self.assertEqual(results[3], (data[3][0], 'good', False, data[3][0]))
-        self.assertEqual(results[4], (data[4][0], 'regression', False, data[3][0]))
-        self.assertEqual(results[5], (data[5][0], 'good', False, data[5][0]))
+        self.assertEqual(results[3], (data[3], False, data[3]))
+        self.assertEqual(results[4], (data[4], False, data[3]))
+        self.assertEqual(results[5], (data[5], False, data[5]))


 if __name__ == '__main__':