from argparse import ArgumentParser
from collections import defaultdict
import json
import os
import sys

import requests

here = os.path.abspath(os.path.dirname(__file__))

ACTIVE_DATA_URL = "http://activedata.allizom.org/query"
PERCENTILE = 0.9 # ignore the bottom PERCENTILE*100% of numbers

def query_activedata(suite, platforms=None):
    platforms = ', "build.platform":%s' % json.dumps(platforms) if platforms else ''

    query = """
{
    "from":"unittest",
    "limit":200000,
    "groupby":["build.platform","result.test","build.type"],
    "select":{"value":"result.duration","aggregate":"average"},
    "where":{"and":[
        {"eq":{"suite":"%s"%s}},
        {"gt":{"run.timestamp":"{{today-week}}"}}
    ]}
}
""" % (suite, platforms)

    response = requests.post(ACTIVE_DATA_URL,
                             data=query,
                             stream=True)
    response.raise_for_status()
    data = response.json()["data"]
    return data

def write_runtimes(data, suite, indir=here, outdir=here):
    testdata = defaultdict(lambda: defaultdict(list))
    for result in data:
        # Each result is a list with four members: platform,
        # test, build type, and duration.
        platform = result[0]
        buildtype = result[2]
        testdata[platform][buildtype].append([result[1], result[3]])

    for platform in testdata:
        for buildtype in testdata[platform]:
            print 'processing %s-%s results' % (platform, buildtype)
            dirname = "%s-%s" % (platform, buildtype)
            out_path = os.path.join(outdir, dirname)
            in_path = os.path.join(indir, dirname)
            outfilename = os.path.join(out_path, "%s.runtimes.json" % suite)
            infilename = os.path.join(in_path, "%s.runtimes.json" % suite)
            if not os.path.exists(out_path):
                os.makedirs(out_path)

            # read in existing data, if any
            indata = None
            if os.path.exists(infilename):
                with open(infilename, 'r') as f:
                    indata = json.loads(f.read()).get('runtimes')

            # identify a threshold of durations, below which we ignore
            runtimes = []
            for result in testdata[platform][buildtype]:
                duration = int(result[1] * 1000) if result[1] else 0
                if duration:
                    runtimes.append(duration)
            runtimes.sort()
            threshold = runtimes[int(len(runtimes) * PERCENTILE)]

            # split the durations into two groups; excluded and specified
            ommitted = []
            specified = indata if indata else {}
            current_tests = []
            for test, duration in testdata[platform][buildtype]:
                current_tests.append(test)
                duration = int(duration * 1000) if duration else 0
                if duration > 0 and duration < threshold:
                    ommitted.append(duration)
                    if test in specified:
                        del specified[test]
                elif duration >= threshold and test != "automation.py":
                    original = specified.get(test, 0)
                    if not original or abs(original - duration) > (original/20):
                        # only write new data if it's > 20% different than original
                        specified[test] = duration

            # delete any test references no longer needed
            to_delete = []
            for test in specified:
                if test not in current_tests:
                    to_delete.append(test)
            for test in to_delete:
                del specified[test]

            avg = int(sum(ommitted)/len(ommitted))

            results = {'excluded_test_average': avg,
                       'runtimes': specified}

            with open(outfilename, 'w') as f:
                f.write(json.dumps(results, indent=2, sort_keys=True))


def cli(args=sys.argv[1:]):
    parser = ArgumentParser()
    parser.add_argument('-o', '--output-directory', dest='outdir',
        default=here, help="Directory to save runtime data.")

    parser.add_argument('-i', '--input-directory', dest='indir',
        default=here, help="Directory from which to read current runtime data.")

    parser.add_argument('-p', '--platforms', default=None,
        help="Comma separated list of platforms from which to generate data.")

    parser.add_argument('-s', '--suite', dest='suite', default=None,
        help="Suite for which to generate data.")

    args = parser.parse_args(args)

    if not args.suite:
        raise ValueError("Must specify suite with the -u argument")
    if ',' in args.suite:
        raise ValueError("Passing multiple suites is not supported")

    if args.platforms:
        args.platforms = args.platforms.split(',')

    data = query_activedata(args.suite, args.platforms)
    write_runtimes(data, args.suite, indir=args.indir, outdir=args.outdir)

if __name__ == "__main__":
    sys.exit(cli())