gecko-dev/testing/runtimes/writeruntimes.py

from argparse import ArgumentParser
from collections import defaultdict
import json
import os
import sys

import requests

here = os.path.abspath(os.path.dirname(__file__))

ACTIVE_DATA_URL = "http://activedata.allizom.org/query"
PERCENTILE = 0.9 # ignore the bottom PERCENTILE*100% of numbers

def query_activedata(suite, platforms=None):
    platforms = ', "build.platform":%s' % json.dumps(platforms) if platforms else ''

    query = """
{
    "from":"unittest",
    "limit":200000,
    "groupby":["build.platform","result.test","build.type"],
    "select":{"value":"result.duration","aggregate":"average"},
    "where":{"and":[
        {"eq":{"suite":"%s"%s}},
        {"gt":{"run.timestamp":"{{today-week}}"}}
    ]}
}
""" % (suite, platforms)

    response = requests.post(ACTIVE_DATA_URL,
                             data=query,
                             stream=True)
    response.raise_for_status()
    data = response.json()["data"]
    return data

def write_runtimes(data, suite, indir=here, outdir=here):
    testdata = defaultdict(lambda: defaultdict(list))
    for result in data:
        # Each result is a list with four members: platform,
        # test, build type, and duration.
        platform = result[0]
        buildtype = result[2]
        testdata[platform][buildtype].append([result[1], result[3]])

    for platform in testdata:
        for buildtype in testdata[platform]:
            print 'processing %s-%s results' % (platform, buildtype)
            dirname = "%s-%s" % (platform, buildtype)
            out_path = os.path.join(outdir, dirname)
            in_path = os.path.join(indir, dirname)
            outfilename = os.path.join(out_path, "%s.runtimes.json" % suite)
            infilename = os.path.join(in_path, "%s.runtimes.json" % suite)
            if not os.path.exists(out_path):
                os.makedirs(out_path)

            # read in existing data, if any
            indata = None
            if os.path.exists(infilename):
                with open(infilename, 'r') as f:
                    indata = json.loads(f.read()).get('runtimes')

            # identify a threshold of durations, below which we ignore
            runtimes = []
            for result in testdata[platform][buildtype]:
                duration = int(result[1] * 1000) if result[1] else 0
                if duration:
                    runtimes.append(duration)
            runtimes.sort()
            threshold = runtimes[int(len(runtimes) * PERCENTILE)]

            # split the durations into two groups; excluded and specified
            ommitted = []
            specified = indata if indata else {}
            current_tests = []
            for test, duration in testdata[platform][buildtype]:
                current_tests.append(test)
                duration = int(duration * 1000) if duration else 0
                if duration > 0 and duration < threshold:
                    ommitted.append(duration)
                    if test in specified:
                        del specified[test]
                elif duration >= threshold and test != "automation.py":
                    original = specified.get(test, 0)
                    if not original or abs(original - duration) > (original/20):
                        # only write new data if it's > 20% different than original
                        specified[test] = duration

            # delete any test references no longer needed
            to_delete = []
            for test in specified:
                if test not in current_tests:
                    to_delete.append(test)
            for test in to_delete:
                del specified[test]

            avg = int(sum(ommitted)/len(ommitted))

            results = {'excluded_test_average': avg,
                       'runtimes': specified}

            with open(outfilename, 'w') as f:
                f.write(json.dumps(results, indent=2, sort_keys=True))


def cli(args=sys.argv[1:]):
    parser = ArgumentParser()
    parser.add_argument('-o', '--output-directory', dest='outdir',
        default=here, help="Directory to save runtime data.")

    parser.add_argument('-i', '--input-directory', dest='indir',
        default=here, help="Directory from which to read current runtime data.")

    parser.add_argument('-p', '--platforms', default=None,
        help="Comma separated list of platforms from which to generate data.")

    parser.add_argument('-s', '--suite', dest='suite', default=None,
        help="Suite for which to generate data.")

    args = parser.parse_args(args)

    if not args.suite:
        raise ValueError("Must specify suite with the -u argument")
    if ',' in args.suite:
        raise ValueError("Passing multiple suites is not supported")

    if args.platforms:
        args.platforms = args.platforms.split(',')

    data = query_activedata(args.suite, args.platforms)
    write_runtimes(data, args.suite, indir=args.indir, outdir=args.outdir)

if __name__ == "__main__":
    sys.exit(cli())
Bug 1173039 - Add script to normalize runtime chunks, r=ahal, DONTBUILD because NPOTB 2015-07-01 01:34:09 +03:00			`from argparse import ArgumentParser`
			`from collections import defaultdict`
			`import json`
			`import os`
			`import sys`

			`import requests`

			`here = os.path.abspath(os.path.dirname(__file__))`

			`ACTIVE_DATA_URL = "http://activedata.allizom.org/query"`
			`PERCENTILE = 0.9 # ignore the bottom PERCENTILE*100% of numbers`

			`def query_activedata(suite, platforms=None):`
			`platforms = ', "build.platform":%s' % json.dumps(platforms) if platforms else ''`

			`query = """`
			`{`
			`"from":"unittest",`
			`"limit":200000,`
			`"groupby":["build.platform","result.test","build.type"],`
			`"select":{"value":"result.duration","aggregate":"average"},`
			`"where":{"and":[`
			`{"eq":{"suite":"%s"%s}},`
			`{"gt":{"run.timestamp":"{{today-week}}"}}`
			`]}`
			`}`
			`""" % (suite, platforms)`

			`response = requests.post(ACTIVE_DATA_URL,`
			`data=query,`
			`stream=True)`
			`response.raise_for_status()`
			`data = response.json()["data"]`
			`return data`

Bug 1179292 - Write new runtime data only if changed, r=ahal * * * Bug 1179292 - Update runtimes files, r=ahal --HG-- extra : commitid : 5s9DroHAyJ2 2015-07-13 21:02:53 +03:00			`def write_runtimes(data, suite, indir=here, outdir=here):`
Bug 1173039 - Add script to normalize runtime chunks, r=ahal, DONTBUILD because NPOTB 2015-07-01 01:34:09 +03:00			`testdata = defaultdict(lambda: defaultdict(list))`
			`for result in data:`
			`# Each result is a list with four members: platform,`
			`# test, build type, and duration.`
			`platform = result[0]`
			`buildtype = result[2]`
			`testdata[platform][buildtype].append([result[1], result[3]])`

			`for platform in testdata:`
			`for buildtype in testdata[platform]:`
			`print 'processing %s-%s results' % (platform, buildtype)`
Bug 1179292 - Write new runtime data only if changed, r=ahal * * * Bug 1179292 - Update runtimes files, r=ahal --HG-- extra : commitid : 5s9DroHAyJ2 2015-07-13 21:02:53 +03:00			`dirname = "%s-%s" % (platform, buildtype)`
			`out_path = os.path.join(outdir, dirname)`
			`in_path = os.path.join(indir, dirname)`
			`outfilename = os.path.join(out_path, "%s.runtimes.json" % suite)`
			`infilename = os.path.join(in_path, "%s.runtimes.json" % suite)`
			`if not os.path.exists(out_path):`
			`os.makedirs(out_path)`

			`# read in existing data, if any`
			`indata = None`
			`if os.path.exists(infilename):`
			`with open(infilename, 'r') as f:`
			`indata = json.loads(f.read()).get('runtimes')`
Bug 1173039 - Add script to normalize runtime chunks, r=ahal, DONTBUILD because NPOTB 2015-07-01 01:34:09 +03:00
			`# identify a threshold of durations, below which we ignore`
			`runtimes = []`
			`for result in testdata[platform][buildtype]:`
			`duration = int(result[1] * 1000) if result[1] else 0`
			`if duration:`
			`runtimes.append(duration)`
			`runtimes.sort()`
			`threshold = runtimes[int(len(runtimes) * PERCENTILE)]`

			`# split the durations into two groups; excluded and specified`
			`ommitted = []`
Bug 1179292 - Write new runtime data only if changed, r=ahal * * * Bug 1179292 - Update runtimes files, r=ahal --HG-- extra : commitid : 5s9DroHAyJ2 2015-07-13 21:02:53 +03:00			`specified = indata if indata else {}`
			`current_tests = []`
Bug 1173039 - Add script to normalize runtime chunks, r=ahal, DONTBUILD because NPOTB 2015-07-01 01:34:09 +03:00			`for test, duration in testdata[platform][buildtype]:`
Bug 1179292 - Write new runtime data only if changed, r=ahal * * * Bug 1179292 - Update runtimes files, r=ahal --HG-- extra : commitid : 5s9DroHAyJ2 2015-07-13 21:02:53 +03:00			`current_tests.append(test)`
Bug 1173039 - Add script to normalize runtime chunks, r=ahal, DONTBUILD because NPOTB 2015-07-01 01:34:09 +03:00			`duration = int(duration * 1000) if duration else 0`
			`if duration > 0 and duration < threshold:`
			`ommitted.append(duration)`
Bug 1179292 - Write new runtime data only if changed, r=ahal * * * Bug 1179292 - Update runtimes files, r=ahal --HG-- extra : commitid : 5s9DroHAyJ2 2015-07-13 21:02:53 +03:00			`if test in specified:`
			`del specified[test]`
			`elif duration >= threshold and test != "automation.py":`
			`original = specified.get(test, 0)`
			`if not original or abs(original - duration) > (original/20):`
			`# only write new data if it's > 20% different than original`
			`specified[test] = duration`

			`# delete any test references no longer needed`
			`to_delete = []`
			`for test in specified:`
			`if test not in current_tests:`
			`to_delete.append(test)`
			`for test in to_delete:`
			`del specified[test]`

Bug 1173039 - Add script to normalize runtime chunks, r=ahal, DONTBUILD because NPOTB 2015-07-01 01:34:09 +03:00			`avg = int(sum(ommitted)/len(ommitted))`

			`results = {'excluded_test_average': avg,`
			`'runtimes': specified}`

			`with open(outfilename, 'w') as f:`
Bug 1179292 - Write new runtime data only if changed, r=ahal * * * Bug 1179292 - Update runtimes files, r=ahal --HG-- extra : commitid : 5s9DroHAyJ2 2015-07-13 21:02:53 +03:00			`f.write(json.dumps(results, indent=2, sort_keys=True))`
Bug 1173039 - Add script to normalize runtime chunks, r=ahal, DONTBUILD because NPOTB 2015-07-01 01:34:09 +03:00

			`def cli(args=sys.argv[1:]):`
			`parser = ArgumentParser()`
			`parser.add_argument('-o', '--output-directory', dest='outdir',`
			`default=here, help="Directory to save runtime data.")`

Bug 1179292 - Write new runtime data only if changed, r=ahal * * * Bug 1179292 - Update runtimes files, r=ahal --HG-- extra : commitid : 5s9DroHAyJ2 2015-07-13 21:02:53 +03:00			`parser.add_argument('-i', '--input-directory', dest='indir',`
			`default=here, help="Directory from which to read current runtime data.")`

Bug 1173039 - Add script to normalize runtime chunks, r=ahal, DONTBUILD because NPOTB 2015-07-01 01:34:09 +03:00			`parser.add_argument('-p', '--platforms', default=None,`
			`help="Comma separated list of platforms from which to generate data.")`

			`parser.add_argument('-s', '--suite', dest='suite', default=None,`
			`help="Suite for which to generate data.")`

			`args = parser.parse_args(args)`

			`if not args.suite:`
			`raise ValueError("Must specify suite with the -u argument")`
			`if ',' in args.suite:`
			`raise ValueError("Passing multiple suites is not supported")`

			`if args.platforms:`
			`args.platforms = args.platforms.split(',')`

			`data = query_activedata(args.suite, args.platforms)`
Bug 1179292 - Write new runtime data only if changed, r=ahal * * * Bug 1179292 - Update runtimes files, r=ahal --HG-- extra : commitid : 5s9DroHAyJ2 2015-07-13 21:02:53 +03:00			`write_runtimes(data, args.suite, indir=args.indir, outdir=args.outdir)`
Bug 1173039 - Add script to normalize runtime chunks, r=ahal, DONTBUILD because NPOTB 2015-07-01 01:34:09 +03:00
			`if __name__ == "__main__":`
			`sys.exit(cli())`