From d8623118fdb3d2740bcc7c4ef70c37628b51a650 Mon Sep 17 00:00:00 2001 From: Ian Bicking Date: Tue, 17 Aug 2010 16:17:00 -0500 Subject: [PATCH 1/3] Pep8 and pyflakes cleanups --HG-- branch : 1.0 --- treeherder/perfalert/perfalert/analyze.py | 30 ++++---- treeherder/perfalert/perfalert/analyze_db.py | 14 +++- .../perfalert/perfalert/analyze_graphapi.py | 5 +- .../perfalert/perfalert/analyze_talos.py | 74 ++++++++++--------- 4 files changed, 72 insertions(+), 51 deletions(-) diff --git a/treeherder/perfalert/perfalert/analyze.py b/treeherder/perfalert/perfalert/analyze.py index c7afb0e1a..a94b4fcff 100644 --- a/treeherder/perfalert/perfalert/analyze.py +++ b/treeherder/perfalert/perfalert/analyze.py @@ -1,5 +1,3 @@ -import csv, datetime, time, os - def analyze(data): s = sum(data) n = len(data) @@ -8,6 +6,7 @@ def analyze(data): stddev = variance ** 0.5 return {"sum": s, "avg": avg, "n": n, "stddev": stddev, "variance": variance} + def calc_t(w1, w2): if len(w1) == 0 or len(w2) == 0: return 0 @@ -20,6 +19,7 @@ def calc_t(w1, w2): return (s2['avg'] - s1['avg']) / (((s1['variance'] / s1['n']) + (s2['variance'] / s2['n'])) ** 0.5) + class PerfDatum: def __init__(self, machine_id, timestamp, value, buildid, time, revision=None): # Which machine is this @@ -38,7 +38,7 @@ class PerfDatum: def __cmp__(self, o): return cmp( (self.time, self.timestamp), - (o.time, o.timestamp) + (o.time, o.timestamp), ) def __eq__(self, o): @@ -56,6 +56,7 @@ class PerfDatum: def __str__(self): return "Build %s on %s %s %s %s" % (self.buildid, self.timestamp, self.time, self.value, self.machine_id) + class TalosAnalyzer: def __init__(self): # List of PerfDatum instances @@ -86,12 +87,12 @@ class TalosAnalyzer: if (j, window, threshold) in self.zenPoints: return self.zenPoints[(j, window, threshold)] - data = [d.value for d in self.data[j-window:j]] + data = [d.value for d in self.data[j - window:j]] stats = analyze(data) stddev = stats['stddev'] avg = stats['avg'] thresh = stddev * threshold - if any( (abs(d-avg) > thresh) for d in data ): + if any((abs(d - avg) > thresh) for d in data): j -= 1 else: break @@ -102,17 +103,18 @@ class TalosAnalyzer: # Use T-Tests # Analyze test data using T-Tests, comparing data[i-j:i] to data[i:i+k] good_data = [] - for i in range(j, len(self.data)-k+1): + for i in range(j, len(self.data) - k + 1): di = self.data[i] jw = [d.value for d in good_data[-j:]] - kw = [d.value for d in self.data[i:i+k]] + kw = [d.value for d in self.data[i:i + k]] my_history = self.machine_history[di.machine_id] my_history_index = my_history.index(di) - my_data = [d.value for d in self.machine_history[di.machine_id][my_history_index-machine_history_size+1:my_history_index+1]] + my_data = [d.value for d in + self.machine_history[di.machine_id][my_history_index - machine_history_size + 1:my_history_index + 1]] other_data = [] - l = len(good_data)-1 - while len(other_data) < k*2 and l > 0: + l = len(good_data) - 1 + while len(other_data) < k * 2 and l > 0: dl = good_data[l] if dl.machine_id != di.machine_id: other_data.insert(0, dl.value) @@ -120,13 +122,13 @@ class TalosAnalyzer: t = calc_t(jw, kw) - if len(other_data) >= k*2 and len(my_data) >= machine_history_size: + if len(other_data) >= k * 2 and len(my_data) >= machine_history_size: m_t = calc_t(other_data, my_data) else: m_t = 0 if abs(m_t) >= machine_threshold: - l = len(good_data)-1 + l = len(good_data) - 1 while l >= 0: dl = good_data[l] if dl.machine_id != di.machine_id: @@ -162,10 +164,10 @@ class TalosAnalyzer: bad_machine_threshold = 3 machine_history = {} - for i in range(window+1, len(self.data)): + for i in range(window + 1, len(self.data)): di = self.data[i] j = self.findZen(i, window, threshold) - data = [d.value for d in self.data[j-window:j]] + data = [d.value for d in self.data[j - window:j]] stats = analyze(data) avg = stats['avg'] stddev = stats['stddev'] diff --git a/treeherder/perfalert/perfalert/analyze_db.py b/treeherder/perfalert/perfalert/analyze_db.py index 28452764f..e9c99c7f3 100644 --- a/treeherder/perfalert/perfalert/analyze_db.py +++ b/treeherder/perfalert/perfalert/analyze_db.py @@ -1,4 +1,3 @@ -import sys import sqlalchemy as sa from sqlalchemy.ext.sqlsoup import SqlSoup @@ -6,6 +5,9 @@ from analyze import PerfDatum from analyze_graphapi import TestSeries db = None +goodNameClause = None + + def connect(url): global db db = SqlSoup(url) @@ -13,6 +15,7 @@ def connect(url): global goodNameClause goodNameClause = db.machines.is_active == 1 + def getTestData(series, start_time): q = sa.select( [db.test_runs.machine_id, db.builds.ref_build_id, db.test_runs.date_run, db.test_runs.average, db.builds.ref_changeset, db.test_runs.run_number, db.builds.branch_id], @@ -37,6 +40,7 @@ def getTestData(series, start_time): data.append(d) return data + def getTestSeries(branches, start_date, test_names): # Find all the Branch/OS/Test combinations if len(test_names) > 0: @@ -56,7 +60,7 @@ def getTestSeries(branches, start_date, test_names): sa.not_(db.machines.name.like('%stage%')), sa.not_(db.tests.pretty_name.like("%NoChrome%")), sa.not_(db.tests.pretty_name.like("%Fast Cycle%")), - test_clause + test_clause, )) q = q.distinct() @@ -66,7 +70,10 @@ def getTestSeries(branches, start_date, test_names): retval.append(TestSeries(*row)) return retval + _machines_cache = {} + + def getMachinesForTest(series): key = (series.os_id, series.branch_id, series.test_id) if key in _machines_cache: @@ -87,7 +94,10 @@ def getMachinesForTest(series): _machines_cache[key] = [row[0] for row in result.fetchall()] return _machines_cache[key] + _name_cache = {} + + def getMachineName(machine_id): if machine_id in _name_cache: return _name_cache[machine_id] diff --git a/treeherder/perfalert/perfalert/analyze_graphapi.py b/treeherder/perfalert/perfalert/analyze_graphapi.py index 43877bb86..f2fc90564 100644 --- a/treeherder/perfalert/perfalert/analyze_graphapi.py +++ b/treeherder/perfalert/perfalert/analyze_graphapi.py @@ -4,10 +4,10 @@ try: except ImportError: import json -import urllib, os, sys - +import urllib from analyze import PerfDatum + class TestSeries: def __init__(self, branch_id, branch_name, os_id, os_name, test_id, test_name): self.branch_id = branch_id @@ -23,6 +23,7 @@ class TestSeries: def __hash__(self): return hash((self.branch_id, self.os_id, self.test_id)) + class GraphAPISource: def __init__(self, baseurl): self.baseurl = baseurl diff --git a/treeherder/perfalert/perfalert/analyze_talos.py b/treeherder/perfalert/perfalert/analyze_talos.py index 63d0a1053..bf37d0960 100644 --- a/treeherder/perfalert/perfalert/analyze_talos.py +++ b/treeherder/perfalert/perfalert/analyze_talos.py @@ -1,5 +1,8 @@ from __future__ import with_statement -import time, urllib, re, os +import time +import urllib +import re +import os import logging as log import email.utils import threading @@ -12,9 +15,8 @@ except ImportError: from smtplib import SMTP from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText -import urllib +from analyze import TalosAnalyzer -from analyze import TalosAnalyzer, PerfDatum def shorten(url, login, apiKey, max_tries=10, sleep_time=30): params = { @@ -42,6 +44,7 @@ def shorten(url, login, apiKey, max_tries=10, sleep_time=30): else: raise ValueError("Unknown error: %s" % data) + def safe_shorten(url, login, apiKey): try: return shorten(url, login, apiKey) @@ -49,9 +52,11 @@ def safe_shorten(url, login, apiKey): log.exception("Unable to shorten url %s", url) return url + def avg(l): return sum(l) / float(len(l)) + def send_msg(fromaddr, subject, msg, addrs, html=None, headers={}): s = SMTP() s.connect() @@ -72,6 +77,7 @@ def send_msg(fromaddr, subject, msg, addrs, html=None, headers={}): s.sendmail(fromaddr, [addr], m.as_string()) s.quit() + class PushDater: def __init__(self, filename, base_url): self.filename = filename @@ -107,7 +113,7 @@ class PushDater: if len(to_query) > 0: log.debug("Fetching %i changesets", len(to_query)) for i in range(0, len(to_query), 50): - chunk = to_query[i:i+50] + chunk = to_query[i:i + 50] changesets = ["changeset=%s" % c for c in chunk] base_url = self.base_url url = "%s/%s/json-pushes?%s" % (base_url, repo_path, "&".join(changesets)) @@ -127,6 +133,7 @@ class PushDater: retval[changeset[:12]] = entry['date'] return retval + class AnalysisRunner: def __init__(self, options, config): self.options = options @@ -233,12 +240,12 @@ class AnalysisRunner: for machine_id in machine_ids: test_params.append((series.test_id, series.branch_id, machine_id)) - test_params = json.dumps(test_params, separators=(",",":")) + test_params = json.dumps(test_params, separators=(",", ":")) #test_params = urllib.quote(test_params) base_url = self.config.get('main', 'base_graph_url') if d is not None: - start_time = d.timestamp - 24*3600 - end_time = d.timestamp + 24*3600 + start_time = d.timestamp - 24 * 3600 + end_time = d.timestamp + 24 * 3600 return "%(base_url)s/graph.html#tests=%(test_params)s&sel=%(start_time)s,%(end_time)s" % locals() else: return "%(base_url)s/graph.html#tests=%(test_params)s" % locals() @@ -317,7 +324,7 @@ class AnalysisRunner: if state == "machine": reason = "Suspected machine issue (%s)" % bad_machine_name if not html: - msg = """\ + msg = """\ %(reason)s: %(test_name)s %(direction)s %(change).3g%% on %(os_name)s %(branch_name)s Previous results: %(initial_value)s from build %(good_build_id)s of %(good_rev)s at %(good_build_time)s on %(good_machine_name)s @@ -328,7 +335,7 @@ class AnalysisRunner: else: chart_url_encoded = xml.sax.saxutils.quoteattr(chart_url) hg_url_encoded = xml.sax.saxutils.quoteattr(hg_url) - msg = """\ + msg = """\

%(reason)s: %(test_name)s %(direction)s %(change).3g%% on %(os_name)s %(branch_name)s

Previous results: %(initial_value)s from build %(good_build_id)s of %(good_rev)s at %(good_build_time)s on %(good_machine_name)s

New results: %(new_value)s from build %(bad_build_id)s of %(bad_rev)s at %(bad_build_time)s on %(bad_machine_name)s

@@ -337,7 +344,7 @@ class AnalysisRunner: """ % locals() else: if not html: - msg = """\ + msg = """\ %(reason)s: %(test_name)s %(direction)s %(change).3g%% on %(os_name)s %(branch_name)s Previous results: %(initial_value)s from build %(good_build_id)s of %(good_rev)s at %(good_build_time)s on %(good_machine_name)s run # %(good_run_number)s @@ -431,7 +438,7 @@ class AnalysisRunner: continue if s.branch_name not in warnings: - warnings[s.branch_name] = {} + warnings[s.branch_name] = {} if s.os_name not in warnings[s.branch_name]: warnings[s.branch_name][s.os_name] = {} if s.test_name not in warnings[s.branch_name][s.os_name]: @@ -474,7 +481,7 @@ class AnalysisRunner: now = time.asctime() fp.write("// Generated at %s\n" % now) fp.write("gFetchTime = ") - json.dump(now, fp, separators=(',',':')) + json.dump(now, fp, separators=(',', ':')) fp.write(";\n") fp.write("var gData = ") # Hackity hack @@ -484,7 +491,7 @@ class AnalysisRunner: json.encoder.FLOAT_REPR = lambda f: "%.8g" % f except: pass - json.dump(self.dashboard_data, fp, separators=(',',':'), sort_keys=True) + json.dump(self.dashboard_data, fp, separators=(',', ':'), sort_keys=True) try: json.encoder.FLOAT_REPR = repr except: @@ -535,8 +542,8 @@ class AnalysisRunner: title = "Talos Regression Graph for %(test_name)s on %(os_name)s %(branch_name)s" % locals() - html = html_template % dict(graph_file = os.path.basename(graph_file), - title=title) + html = html_template % dict(graph_file=os.path.basename(graph_file), + title=title) if not os.path.exists(graph_dir): os.makedirs(graph_dir) # Copy in the rest of the HTML as well @@ -558,7 +565,7 @@ class AnalysisRunner: self.warning_history['inactive_machines'] = {} # Complain about anything that hasn't reported in 48 hours - cutoff = time.time() - 48*3600 + cutoff = time.time() - 48 * 3600 addresses = [] if self.config.has_option('main', 'machine_emails'): @@ -569,7 +576,7 @@ class AnalysisRunner: machine_name = self.source.getMachineName(machine_id) # When did we last warn about this machine? - if self.warning_history['inactive_machines'].get(machine_name, 0) < time.time() - 7*24*3600: + if self.warning_history['inactive_machines'].get(machine_name, 0) < time.time() - 7 * 24 * 3600: # If it was over a week ago, then send another warning self.warning_history['inactive_machines'][machine_name] = time.time() @@ -614,7 +621,7 @@ class AnalysisRunner: data = self.source.getTestData(s, options.start_time) # Add it to our dashboard data - sevenDaysAgo = time.time() - 7*24*60*60 + sevenDaysAgo = time.time() - 7 * 24 * 60 * 60 importantTests = [] for t in re.split(r"(? time.time() - 7*24*3600: + if self.warning_history['bad_machines'].get(machine_name, 0) > time.time() - 7 * 24 * 3600: skip = True else: # If it was over a week ago, then send another warning @@ -725,6 +732,7 @@ class AnalysisRunner: log.info("Fetching list of tests") series = self.source.getTestSeries(self.options.branches, self.options.start_time, self.options.tests) self.done = False + def runner(): while not self.done: try: @@ -793,16 +801,16 @@ if __name__ == "__main__": parser.add_option("", "--catchup", dest="catchup", action="store_true", help="Don't output any warnings, just process data") parser.set_defaults( - branches = [], - tests = [], - start_time = time.time() - 30*24*3600, - verbosity = log.INFO, - output = None, - json = None, - addresses = [], - machine_addresses = [], - config = "analysis.cfg", - catchup = False, + branches=[], + tests=[], + start_time=time.time() - 30 * 24 * 3600, + verbosity=log.INFO, + output=None, + json=None, + addresses=[], + machine_addresses=[], + config="analysis.cfg", + catchup=False, ) options, args = parser.parse_args() @@ -814,9 +822,9 @@ if __name__ == "__main__": config.read([options.config]) if options.addresses: - config.set('main', 'regression_emails', ",".join(option.addresses)) + config.set('main', 'regression_emails', ",".join(options.addresses)) if options.machine_addresses: - config.set('main', 'machine_emails', ",".join(option.machine_addresses)) + config.set('main', 'machine_emails', ",".join(options.machine_addresses)) runner = AnalysisRunner(options, config) try: From 6e98f20fabca3a6f14fd31d246d5cdaec77043ba Mon Sep 17 00:00:00 2001 From: Ian Bicking Date: Fri, 17 Sep 2010 14:36:12 -0400 Subject: [PATCH 2/3] Put in environmental variable substitution into the config files --HG-- branch : 1.0 --- treeherder/perfalert/perfalert/analyze_talos.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/treeherder/perfalert/perfalert/analyze_talos.py b/treeherder/perfalert/perfalert/analyze_talos.py index bf37d0960..a1a32d551 100644 --- a/treeherder/perfalert/perfalert/analyze_talos.py +++ b/treeherder/perfalert/perfalert/analyze_talos.py @@ -16,6 +16,7 @@ from smtplib import SMTP from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from analyze import TalosAnalyzer +from string import Template def shorten(url, login, apiKey, max_tries=10, sleep_time=30): @@ -767,7 +768,7 @@ class AnalysisRunner: except KeyboardInterrupt: print "Exiting..." self.done = True - + for t in threads: t.join() else: @@ -826,6 +827,16 @@ if __name__ == "__main__": if options.machine_addresses: config.set('main', 'machine_emails', ",".join(options.machine_addresses)) + vars = os.environ.copy() + vars['sys_prefix'] = sys.prefix + vars['here'] = os.path.dirname(__file__) + for section in config.sections(): + for option in config.options(section): + value = config.get(section, option) + if '$' in value: + value = Template(value).substitute(vars) + config.set(section, option, value) + runner = AnalysisRunner(options, config) try: runner.run() From 3c3b2e19c00cca67c4377875b2123f4a8f6aa729 Mon Sep 17 00:00:00 2001 From: Ian Bicking Date: Fri, 17 Sep 2010 15:27:20 -0400 Subject: [PATCH 3/3] fix typo s/option/options/ --HG-- branch : 1.0 --- treeherder/perfalert/perfalert/analyze_talos.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/treeherder/perfalert/perfalert/analyze_talos.py b/treeherder/perfalert/perfalert/analyze_talos.py index 6fb22dc75..2bdf6deec 100644 --- a/treeherder/perfalert/perfalert/analyze_talos.py +++ b/treeherder/perfalert/perfalert/analyze_talos.py @@ -1059,9 +1059,9 @@ if __name__ == "__main__": config.read([options.config]) if options.addresses: - config.set('main', 'regression_emails', ",".join(option.addresses)) + config.set('main', 'regression_emails', ",".join(options.addresses)) if options.machine_addresses: - config.set('main', 'machine_emails', ",".join(option.machine_addresses)) + config.set('main', 'machine_emails', ",".join(options.machine_addresses)) vars = os.environ.copy() vars['sys_prefix'] = sys.prefix