From d8623118fdb3d2740bcc7c4ef70c37628b51a650 Mon Sep 17 00:00:00 2001
From: Ian Bicking <ianb@colorstudy.com>
Date: Tue, 17 Aug 2010 16:17:00 -0500
Subject: [PATCH 1/3] Pep8 and pyflakes cleanups

--HG--
branch : 1.0
---
 treeherder/perfalert/perfalert/analyze.py     | 30 ++++----
 treeherder/perfalert/perfalert/analyze_db.py  | 14 +++-
 .../perfalert/perfalert/analyze_graphapi.py   |  5 +-
 .../perfalert/perfalert/analyze_talos.py      | 74 ++++++++++---------
 4 files changed, 72 insertions(+), 51 deletions(-)

diff --git a/treeherder/perfalert/perfalert/analyze.py b/treeherder/perfalert/perfalert/analyze.py
index c7afb0e1a..a94b4fcff 100644
--- a/treeherder/perfalert/perfalert/analyze.py
+++ b/treeherder/perfalert/perfalert/analyze.py
@@ -1,5 +1,3 @@
-import csv, datetime, time, os
-
 def analyze(data):
     s = sum(data)
     n = len(data)
@@ -8,6 +6,7 @@ def analyze(data):
     stddev = variance ** 0.5
     return {"sum": s, "avg": avg, "n": n, "stddev": stddev, "variance": variance}
 
+
 def calc_t(w1, w2):
     if len(w1) == 0 or len(w2) == 0:
         return 0
@@ -20,6 +19,7 @@ def calc_t(w1, w2):
 
     return (s2['avg'] - s1['avg']) / (((s1['variance'] / s1['n']) + (s2['variance'] / s2['n'])) ** 0.5)
 
+
 class PerfDatum:
     def __init__(self, machine_id, timestamp, value, buildid, time, revision=None):
         # Which machine is this
@@ -38,7 +38,7 @@ class PerfDatum:
     def __cmp__(self, o):
         return cmp(
                 (self.time, self.timestamp),
-                (o.time, o.timestamp)
+                (o.time, o.timestamp),
                 )
 
     def __eq__(self, o):
@@ -56,6 +56,7 @@ class PerfDatum:
     def __str__(self):
         return "Build %s on %s %s %s %s" % (self.buildid, self.timestamp, self.time, self.value, self.machine_id)
 
+
 class TalosAnalyzer:
     def __init__(self):
         # List of PerfDatum instances
@@ -86,12 +87,12 @@ class TalosAnalyzer:
             if (j, window, threshold) in self.zenPoints:
                 return self.zenPoints[(j, window, threshold)]
 
-            data = [d.value for d in self.data[j-window:j]]
+            data = [d.value for d in self.data[j - window:j]]
             stats = analyze(data)
             stddev = stats['stddev']
             avg = stats['avg']
             thresh = stddev * threshold
-            if any( (abs(d-avg) > thresh) for d in data ):
+            if any((abs(d - avg) > thresh) for d in data):
                 j -= 1
             else:
                 break
@@ -102,17 +103,18 @@ class TalosAnalyzer:
         # Use T-Tests
         # Analyze test data using T-Tests, comparing data[i-j:i] to data[i:i+k]
         good_data = []
-        for i in range(j, len(self.data)-k+1):
+        for i in range(j, len(self.data) - k + 1):
             di = self.data[i]
             jw = [d.value for d in good_data[-j:]]
-            kw = [d.value for d in self.data[i:i+k]]
+            kw = [d.value for d in self.data[i:i + k]]
 
             my_history = self.machine_history[di.machine_id]
             my_history_index = my_history.index(di)
-            my_data = [d.value for d in self.machine_history[di.machine_id][my_history_index-machine_history_size+1:my_history_index+1]]
+            my_data = [d.value for d in
+                       self.machine_history[di.machine_id][my_history_index - machine_history_size + 1:my_history_index + 1]]
             other_data = []
-            l = len(good_data)-1
-            while len(other_data) < k*2 and l > 0:
+            l = len(good_data) - 1
+            while len(other_data) < k * 2 and l > 0:
                 dl = good_data[l]
                 if dl.machine_id != di.machine_id:
                     other_data.insert(0, dl.value)
@@ -120,13 +122,13 @@ class TalosAnalyzer:
 
             t = calc_t(jw, kw)
 
-            if len(other_data) >= k*2 and len(my_data) >= machine_history_size:
+            if len(other_data) >= k * 2 and len(my_data) >= machine_history_size:
                 m_t = calc_t(other_data, my_data)
             else:
                 m_t = 0
 
             if abs(m_t) >= machine_threshold:
-                l = len(good_data)-1
+                l = len(good_data) - 1
                 while l >= 0:
                     dl = good_data[l]
                     if dl.machine_id != di.machine_id:
@@ -162,10 +164,10 @@ class TalosAnalyzer:
         bad_machine_threshold = 3
 
         machine_history = {}
-        for i in range(window+1, len(self.data)):
+        for i in range(window + 1, len(self.data)):
             di = self.data[i]
             j = self.findZen(i, window, threshold)
-            data = [d.value for d in self.data[j-window:j]]
+            data = [d.value for d in self.data[j - window:j]]
             stats = analyze(data)
             avg = stats['avg']
             stddev = stats['stddev']
diff --git a/treeherder/perfalert/perfalert/analyze_db.py b/treeherder/perfalert/perfalert/analyze_db.py
index 28452764f..e9c99c7f3 100644
--- a/treeherder/perfalert/perfalert/analyze_db.py
+++ b/treeherder/perfalert/perfalert/analyze_db.py
@@ -1,4 +1,3 @@
-import sys
 import sqlalchemy as sa
 from sqlalchemy.ext.sqlsoup import SqlSoup
 
@@ -6,6 +5,9 @@ from analyze import PerfDatum
 from analyze_graphapi import TestSeries
 
 db = None
+goodNameClause = None
+
+
 def connect(url):
     global db
     db = SqlSoup(url)
@@ -13,6 +15,7 @@ def connect(url):
     global goodNameClause
     goodNameClause = db.machines.is_active == 1
 
+
 def getTestData(series, start_time):
     q = sa.select(
         [db.test_runs.machine_id, db.builds.ref_build_id, db.test_runs.date_run, db.test_runs.average, db.builds.ref_changeset, db.test_runs.run_number, db.builds.branch_id],
@@ -37,6 +40,7 @@ def getTestData(series, start_time):
         data.append(d)
     return data
 
+
 def getTestSeries(branches, start_date, test_names):
     # Find all the Branch/OS/Test combinations
     if len(test_names) > 0:
@@ -56,7 +60,7 @@ def getTestSeries(branches, start_date, test_names):
                 sa.not_(db.machines.name.like('%stage%')),
                 sa.not_(db.tests.pretty_name.like("%NoChrome%")),
                 sa.not_(db.tests.pretty_name.like("%Fast Cycle%")),
-                test_clause
+                test_clause,
             ))
 
     q = q.distinct()
@@ -66,7 +70,10 @@ def getTestSeries(branches, start_date, test_names):
         retval.append(TestSeries(*row))
     return retval
 
+
 _machines_cache = {}
+
+
 def getMachinesForTest(series):
     key = (series.os_id, series.branch_id, series.test_id)
     if key in _machines_cache:
@@ -87,7 +94,10 @@ def getMachinesForTest(series):
     _machines_cache[key] = [row[0] for row in result.fetchall()]
     return _machines_cache[key]
 
+
 _name_cache = {}
+
+
 def getMachineName(machine_id):
     if machine_id in _name_cache:
         return _name_cache[machine_id]
diff --git a/treeherder/perfalert/perfalert/analyze_graphapi.py b/treeherder/perfalert/perfalert/analyze_graphapi.py
index 43877bb86..f2fc90564 100644
--- a/treeherder/perfalert/perfalert/analyze_graphapi.py
+++ b/treeherder/perfalert/perfalert/analyze_graphapi.py
@@ -4,10 +4,10 @@ try:
 except ImportError:
     import json
 
-import urllib, os, sys
-
+import urllib
 from analyze import PerfDatum
 
+
 class TestSeries:
     def __init__(self, branch_id, branch_name, os_id, os_name, test_id, test_name):
         self.branch_id = branch_id
@@ -23,6 +23,7 @@ class TestSeries:
     def __hash__(self):
         return hash((self.branch_id, self.os_id, self.test_id))
 
+
 class GraphAPISource:
     def __init__(self, baseurl):
         self.baseurl = baseurl
diff --git a/treeherder/perfalert/perfalert/analyze_talos.py b/treeherder/perfalert/perfalert/analyze_talos.py
index 63d0a1053..bf37d0960 100644
--- a/treeherder/perfalert/perfalert/analyze_talos.py
+++ b/treeherder/perfalert/perfalert/analyze_talos.py
@@ -1,5 +1,8 @@
 from __future__ import with_statement
-import time, urllib, re, os
+import time
+import urllib
+import re
+import os
 import logging as log
 import email.utils
 import threading
@@ -12,9 +15,8 @@ except ImportError:
 from smtplib import SMTP
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
-import urllib
+from analyze import TalosAnalyzer
 
-from analyze import TalosAnalyzer, PerfDatum
 
 def shorten(url, login, apiKey, max_tries=10, sleep_time=30):
     params = {
@@ -42,6 +44,7 @@ def shorten(url, login, apiKey, max_tries=10, sleep_time=30):
         else:
             raise ValueError("Unknown error: %s" % data)
 
+
 def safe_shorten(url, login, apiKey):
     try:
         return shorten(url, login, apiKey)
@@ -49,9 +52,11 @@ def safe_shorten(url, login, apiKey):
         log.exception("Unable to shorten url %s", url)
         return url
 
+
 def avg(l):
     return sum(l) / float(len(l))
 
+
 def send_msg(fromaddr, subject, msg, addrs, html=None, headers={}):
     s = SMTP()
     s.connect()
@@ -72,6 +77,7 @@ def send_msg(fromaddr, subject, msg, addrs, html=None, headers={}):
         s.sendmail(fromaddr, [addr], m.as_string())
     s.quit()
 
+
 class PushDater:
     def __init__(self, filename, base_url):
         self.filename = filename
@@ -107,7 +113,7 @@ class PushDater:
         if len(to_query) > 0:
             log.debug("Fetching %i changesets", len(to_query))
             for i in range(0, len(to_query), 50):
-                chunk = to_query[i:i+50]
+                chunk = to_query[i:i + 50]
                 changesets = ["changeset=%s" % c for c in chunk]
                 base_url = self.base_url
                 url = "%s/%s/json-pushes?%s" % (base_url, repo_path, "&".join(changesets))
@@ -127,6 +133,7 @@ class PushDater:
                                 retval[changeset[:12]] = entry['date']
         return retval
 
+
 class AnalysisRunner:
     def __init__(self, options, config):
         self.options = options
@@ -233,12 +240,12 @@ class AnalysisRunner:
         for machine_id in machine_ids:
             test_params.append((series.test_id, series.branch_id, machine_id))
 
-        test_params = json.dumps(test_params, separators=(",",":"))
+        test_params = json.dumps(test_params, separators=(",", ":"))
         #test_params = urllib.quote(test_params)
         base_url = self.config.get('main', 'base_graph_url')
         if d is not None:
-            start_time = d.timestamp - 24*3600
-            end_time = d.timestamp + 24*3600
+            start_time = d.timestamp - 24 * 3600
+            end_time = d.timestamp + 24 * 3600
             return "%(base_url)s/graph.html#tests=%(test_params)s&sel=%(start_time)s,%(end_time)s" % locals()
         else:
             return "%(base_url)s/graph.html#tests=%(test_params)s" % locals()
@@ -317,7 +324,7 @@ class AnalysisRunner:
         if state == "machine":
             reason = "Suspected machine issue (%s)" % bad_machine_name
             if not html:
-                msg =  """\
+                msg = """\
 %(reason)s: %(test_name)s %(direction)s %(change).3g%% on %(os_name)s %(branch_name)s
     Previous results:
         %(initial_value)s from build %(good_build_id)s of %(good_rev)s at %(good_build_time)s on %(good_machine_name)s
@@ -328,7 +335,7 @@ class AnalysisRunner:
             else:
                 chart_url_encoded = xml.sax.saxutils.quoteattr(chart_url)
                 hg_url_encoded = xml.sax.saxutils.quoteattr(hg_url)
-                msg =  """\
+                msg = """\
 <p>%(reason)s: %(test_name)s <a href=%(chart_url_encoded)s>%(direction)s %(change).3g%%</a> on %(os_name)s %(branch_name)s</p>
 <p>Previous results: %(initial_value)s from build %(good_build_id)s of %(good_rev)s at %(good_build_time)s on %(good_machine_name)s</p>
 <p>New results: %(new_value)s from build %(bad_build_id)s of %(bad_rev)s at %(bad_build_time)s on %(bad_machine_name)s</p>
@@ -337,7 +344,7 @@ class AnalysisRunner:
 """ % locals()
         else:
             if not html:
-                msg =  """\
+                msg = """\
 %(reason)s: %(test_name)s %(direction)s %(change).3g%% on %(os_name)s %(branch_name)s
     Previous results:
         %(initial_value)s from build %(good_build_id)s of %(good_rev)s at %(good_build_time)s on %(good_machine_name)s run # %(good_run_number)s
@@ -431,7 +438,7 @@ class AnalysisRunner:
                 continue
 
             if s.branch_name not in warnings:
-               warnings[s.branch_name] = {}
+                warnings[s.branch_name] = {}
             if s.os_name not in warnings[s.branch_name]:
                 warnings[s.branch_name][s.os_name] = {}
             if s.test_name not in warnings[s.branch_name][s.os_name]:
@@ -474,7 +481,7 @@ class AnalysisRunner:
         now = time.asctime()
         fp.write("// Generated at %s\n" % now)
         fp.write("gFetchTime = ")
-        json.dump(now, fp, separators=(',',':'))
+        json.dump(now, fp, separators=(',', ':'))
         fp.write(";\n")
         fp.write("var gData = ")
         # Hackity hack
@@ -484,7 +491,7 @@ class AnalysisRunner:
             json.encoder.FLOAT_REPR = lambda f: "%.8g" % f
         except:
             pass
-        json.dump(self.dashboard_data, fp, separators=(',',':'), sort_keys=True)
+        json.dump(self.dashboard_data, fp, separators=(',', ':'), sort_keys=True)
         try:
             json.encoder.FLOAT_REPR = repr
         except:
@@ -535,8 +542,8 @@ class AnalysisRunner:
 
         title = "Talos Regression Graph for %(test_name)s on %(os_name)s %(branch_name)s" % locals()
 
-        html = html_template % dict(graph_file = os.path.basename(graph_file),
-                title=title)
+        html = html_template % dict(graph_file=os.path.basename(graph_file),
+                                    title=title)
         if not os.path.exists(graph_dir):
             os.makedirs(graph_dir)
             # Copy in the rest of the HTML as well
@@ -558,7 +565,7 @@ class AnalysisRunner:
             self.warning_history['inactive_machines'] = {}
 
         # Complain about anything that hasn't reported in 48 hours
-        cutoff = time.time() - 48*3600
+        cutoff = time.time() - 48 * 3600
 
         addresses = []
         if self.config.has_option('main', 'machine_emails'):
@@ -569,7 +576,7 @@ class AnalysisRunner:
                 machine_name = self.source.getMachineName(machine_id)
 
                 # When did we last warn about this machine?
-                if self.warning_history['inactive_machines'].get(machine_name, 0) < time.time() - 7*24*3600:
+                if self.warning_history['inactive_machines'].get(machine_name, 0) < time.time() - 7 * 24 * 3600:
                     # If it was over a week ago, then send another warning
                     self.warning_history['inactive_machines'][machine_name] = time.time()
 
@@ -614,7 +621,7 @@ class AnalysisRunner:
         data = self.source.getTestData(s, options.start_time)
 
         # Add it to our dashboard data
-        sevenDaysAgo = time.time() - 7*24*60*60
+        sevenDaysAgo = time.time() - 7 * 24 * 60 * 60
         importantTests = []
         for t in re.split(r"(?<!\\),", self.config.get("dashboard", "tests")):
             t = t.replace("\\,", ",").strip()
@@ -651,7 +658,7 @@ class AnalysisRunner:
                 if machine_name.startswith("_"):
                     continue
                 results = _d[machine_name]['results']
-                values = [results[i+1] for i in range(0, len(results), 2)]
+                values = [results[i + 1] for i in range(0, len(results), 2)]
                 _d[machine_name]['stats'] = [avg(values), max(values), min(values)]
 
         self.updateTimes(s.branch_name, data)
@@ -676,7 +683,7 @@ class AnalysisRunner:
         last_err = None
         last_err_good = None
         #cutoff = self.options.start_time
-        cutoff = time.time() - 7*24*3600
+        cutoff = time.time() - 7 * 24 * 3600
         series_data = []
         for d, state in analysis_gen:
             skip = False
@@ -696,7 +703,7 @@ class AnalysisRunner:
                             if 'bad_machines' not in self.warning_history:
                                 self.warning_history['bad_machines'] = {}
                             # When did we last warn about this machine?
-                            if self.warning_history['bad_machines'].get(machine_name, 0) > time.time() - 7*24*3600:
+                            if self.warning_history['bad_machines'].get(machine_name, 0) > time.time() - 7 * 24 * 3600:
                                 skip = True
                             else:
                                 # If it was over a week ago, then send another warning
@@ -725,6 +732,7 @@ class AnalysisRunner:
         log.info("Fetching list of tests")
         series = self.source.getTestSeries(self.options.branches, self.options.start_time, self.options.tests)
         self.done = False
+
         def runner():
             while not self.done:
                 try:
@@ -793,16 +801,16 @@ if __name__ == "__main__":
     parser.add_option("", "--catchup", dest="catchup", action="store_true", help="Don't output any warnings, just process data")
 
     parser.set_defaults(
-            branches = [],
-            tests = [],
-            start_time = time.time() - 30*24*3600,
-            verbosity = log.INFO,
-            output = None,
-            json = None,
-            addresses = [],
-            machine_addresses = [],
-            config = "analysis.cfg",
-            catchup = False,
+            branches=[],
+            tests=[],
+            start_time=time.time() - 30 * 24 * 3600,
+            verbosity=log.INFO,
+            output=None,
+            json=None,
+            addresses=[],
+            machine_addresses=[],
+            config="analysis.cfg",
+            catchup=False,
             )
 
     options, args = parser.parse_args()
@@ -814,9 +822,9 @@ if __name__ == "__main__":
     config.read([options.config])
 
     if options.addresses:
-        config.set('main', 'regression_emails', ",".join(option.addresses))
+        config.set('main', 'regression_emails', ",".join(options.addresses))
     if options.machine_addresses:
-        config.set('main', 'machine_emails', ",".join(option.machine_addresses))
+        config.set('main', 'machine_emails', ",".join(options.machine_addresses))
 
     runner = AnalysisRunner(options, config)
     try:

From 6e98f20fabca3a6f14fd31d246d5cdaec77043ba Mon Sep 17 00:00:00 2001
From: Ian Bicking <ianb@colorstudy.com>
Date: Fri, 17 Sep 2010 14:36:12 -0400
Subject: [PATCH 2/3] Put in environmental variable substitution into the
 config files

--HG--
branch : 1.0
---
 treeherder/perfalert/perfalert/analyze_talos.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/treeherder/perfalert/perfalert/analyze_talos.py b/treeherder/perfalert/perfalert/analyze_talos.py
index bf37d0960..a1a32d551 100644
--- a/treeherder/perfalert/perfalert/analyze_talos.py
+++ b/treeherder/perfalert/perfalert/analyze_talos.py
@@ -16,6 +16,7 @@ from smtplib import SMTP
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from analyze import TalosAnalyzer
+from string import Template
 
 
 def shorten(url, login, apiKey, max_tries=10, sleep_time=30):
@@ -767,7 +768,7 @@ class AnalysisRunner:
                 except KeyboardInterrupt:
                     print "Exiting..."
                     self.done = True
-                        
+
             for t in threads:
                 t.join()
         else:
@@ -826,6 +827,16 @@ if __name__ == "__main__":
     if options.machine_addresses:
         config.set('main', 'machine_emails', ",".join(options.machine_addresses))
 
+    vars = os.environ.copy()
+    vars['sys_prefix'] = sys.prefix
+    vars['here'] = os.path.dirname(__file__)
+    for section in config.sections():
+        for option in config.options(section):
+            value = config.get(section, option)
+            if '$' in value:
+                value = Template(value).substitute(vars)
+                config.set(section, option, value)
+
     runner = AnalysisRunner(options, config)
     try:
         runner.run()

From 3c3b2e19c00cca67c4377875b2123f4a8f6aa729 Mon Sep 17 00:00:00 2001
From: Ian Bicking <ianb@colorstudy.com>
Date: Fri, 17 Sep 2010 15:27:20 -0400
Subject: [PATCH 3/3] fix typo s/option/options/

--HG--
branch : 1.0
---
 treeherder/perfalert/perfalert/analyze_talos.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/treeherder/perfalert/perfalert/analyze_talos.py b/treeherder/perfalert/perfalert/analyze_talos.py
index 6fb22dc75..2bdf6deec 100644
--- a/treeherder/perfalert/perfalert/analyze_talos.py
+++ b/treeherder/perfalert/perfalert/analyze_talos.py
@@ -1059,9 +1059,9 @@ if __name__ == "__main__":
     config.read([options.config])
 
     if options.addresses:
-        config.set('main', 'regression_emails', ",".join(option.addresses))
+        config.set('main', 'regression_emails', ",".join(options.addresses))
     if options.machine_addresses:
-        config.set('main', 'machine_emails', ",".join(option.machine_addresses))
+        config.set('main', 'machine_emails', ",".join(options.machine_addresses))
 
     vars = os.environ.copy()
     vars['sys_prefix'] = sys.prefix