From 7f0460ef5f3044e273f572d0f30722884624c757 Mon Sep 17 00:00:00 2001 From: Vladimir Ivanov Date: Thu, 3 Sep 2015 14:33:29 -0700 Subject: [PATCH] Introducing a flexible test tagging system: Each test is now tagged, so we can control what runs and when. Tests tagged with 'bvt-*' are run on every checkin job in a build lab Tests tagged with 'nightly-*' are run every night Predicates mechanism embedded into the new tagging system allows to tag a test in a conditional way, specifying python expression as a predicate. That allows us to selectively reduce BVT test matrix and speed-up execution of BVT runs in a lab --- .../DoublePrecision/testcases.yml | 6 + .../SinglePrecision/testcases.yml | 6 + Tests/Speech/LSTM/testcases.yml | 5 + Tests/Speech/QuickE2E/testcases.yml | 5 + Tests/TestDriver.py | 195 ++++++++++++++---- 5 files changed, 172 insertions(+), 45 deletions(-) diff --git a/Tests/ParallelTraining/NoQuantization/DoublePrecision/testcases.yml b/Tests/ParallelTraining/NoQuantization/DoublePrecision/testcases.yml index b73a1e67d..733c6ab5c 100644 --- a/Tests/ParallelTraining/NoQuantization/DoublePrecision/testcases.yml +++ b/Tests/ParallelTraining/NoQuantization/DoublePrecision/testcases.yml @@ -1,4 +1,10 @@ dataDir: ../../Data +tags: + # running on every BVT job in 'P' (Parallel) leg in Debug-GPU and Release-CPU configurations: + # Also skipping Release-CPU on linux to save time (for now due to unknown reason it runs much longer comparing to windows) + - bvt-p ((flavor=='debug') ^ (device=='cpu')) and not (os=='linux' and device=='cpu') + # running unconditionally on every Nightly job in 'P' leg + - nightly-p testCases: Must train epochs in exactly same order and parameters for each MPI Rank: diff --git a/Tests/ParallelTraining/NoQuantization/SinglePrecision/testcases.yml b/Tests/ParallelTraining/NoQuantization/SinglePrecision/testcases.yml index f91121dc0..42c8305f5 100644 --- a/Tests/ParallelTraining/NoQuantization/SinglePrecision/testcases.yml +++ b/Tests/ParallelTraining/NoQuantization/SinglePrecision/testcases.yml @@ -1,5 +1,11 @@ dataDir: ../../Data +tags: + # running on every BVT job in 'P' (Parallel) leg in Debug-GPU and Release-CPU configurations: + - bvt-p (flavor=='debug') ^ (device=='cpu') + # running unconditionally on every Nightly job in 'P' leg + - nightly-p + testCases: Must train epochs in exactly same order and parameters for each MPI Rank: patterns: diff --git a/Tests/Speech/LSTM/testcases.yml b/Tests/Speech/LSTM/testcases.yml index 070350e56..ed393be88 100644 --- a/Tests/Speech/LSTM/testcases.yml +++ b/Tests/Speech/LSTM/testcases.yml @@ -1,4 +1,9 @@ dataDir: ../Data +tags: + # running on every BVT job in 'L' (LSTM) leg in Debug-GPU and Release-CPU configurations: + - bvt-l (flavor=='debug') ^ (device=='cpu') + # running unconditionally on every Nightly job in 'L' leg + - nightly-l testCases: CNTK Run must be completed: diff --git a/Tests/Speech/QuickE2E/testcases.yml b/Tests/Speech/QuickE2E/testcases.yml index ef22d550e..3455d5488 100644 --- a/Tests/Speech/QuickE2E/testcases.yml +++ b/Tests/Speech/QuickE2E/testcases.yml @@ -1,4 +1,9 @@ dataDir: ../Data +tags: + # running on every BVT job in 'S' (Speech) leg in Debug-GPU and Release-CPU configurations: + - bvt-s (flavor=='debug') ^ (device=='cpu') + # running unconditionally on every Nightly job in 'S' leg + - nightly-s testCases: CNTK Run must be completed: diff --git a/Tests/TestDriver.py b/Tests/TestDriver.py index 6c119d074..fd4cee1d2 100755 --- a/Tests/TestDriver.py +++ b/Tests/TestDriver.py @@ -20,6 +20,10 @@ # # ----- testcases.yml format ------- # dataDir: # +# - +# - .... # # testCases: # : @@ -62,7 +66,18 @@ # 8. baseline.txt # where = { debug | release } # = { cpu | gpu } -# +# +# Baseline files are optional. They only evaluate if test defines one or more pattern-drivern test cases. +# If no test cases are defined, then TestDriver uses exit code of the run-test script as the only criteria +# of successful copmpletion of the test. + +# ----- Tagging system ------ +# Unit tests can be optionally tagged with 1 or many tags +# CNTK build/test lab uses those tags to understand which tests to run during different flavors of build jobs (nightly, BVT, checkin) +# +# Tag can be optionally predicated with a python boolean expression over 'flavor' (debug/release), 'device' (cpu/gpu), 'os' (windows/linux) variables. +# this allows to restrict tagging of the test to specific combinations of those variables +# # ----- Algorithm ------ # Baseline verification: # For each testcase @@ -80,7 +95,7 @@ # matching against all test-cases/pattern simulteneously # -import sys, os, argparse, traceback, yaml, subprocess, random, re, time +import sys, os, argparse, traceback, yaml, subprocess, random, re, time, sets thisDir = os.path.dirname(os.path.realpath(__file__)) windows = os.getenv("OS")=="Windows_NT" @@ -97,8 +112,10 @@ class Test: self.suite = suite self.name = name self.fullName = suite + "/" + name + # computing location of test directory (yml file directory) self.testDir = os.path.dirname(pathToYmlFile) + # parsing yml file with testcases with open(pathToYmlFile, "r") as f: self.rawYamlData = yaml.safe_load(f.read()) @@ -109,14 +126,42 @@ class Test: else: self.dataDir = self.testDir - testCasesYaml = self.rawYamlData["testCases"] + # parsing test cases self.testCases = [] - for name in testCasesYaml.keys(): - try: - self.testCases.append(TestCase(name, testCasesYaml[name])) - except Exception as e: - print >>sys.stderr, "ERROR registering test case: " + name - raise + if "testCases" in self.rawYamlData.keys(): + testCasesYaml = self.rawYamlData["testCases"] + for name in testCasesYaml.keys(): + try: + self.testCases.append(TestCase(name, testCasesYaml[name])) + except Exception as e: + print >>sys.stderr, "ERROR registering test case: " + name + raise + + # parsing all tags, example input: + # tags: + # - bvt-l (flavor=='debug') ^ (device=='cpu') # tag with a python predicate expression + # - nightly-l #tag without a predicate + # + # Predicate expressions must produce boolean value and may refer to following variables: flavor, device, os + self.tags = {} + if self.rawYamlData["tags"]: + for tagLine in self.rawYamlData["tags"]: + tagLineSplit = tagLine.split(' ', 1) # splitting tag name from predicate expression + tagName = tagLineSplit[0].lower().strip() + + # using specified python expression (or 'True' if former isn't provided) + pythonExpr = tagLineSplit[1] if len(tagLineSplit)==2 else "True" + + # converting python expression into lambda and doing a smoke test by calling it with dummy parameters + predicate = lambda pythonExpr=pythonExpr, **kwargs: eval(pythonExpr, kwargs) + try: + assert(type(predicate(flavor='foo', device='var', os='foobar')) == bool) + except Exception as e: + print "Can't parse tag predicate expression in {0} ({1}):\n{2}".format(pathToYmlFile, pythonExpr, e) + raise e + + # saving generated lambda into tags dictionary + self.tags[tagName] = predicate # Populates Tests.allTestsIndexedByFullName by scanning directory tree # and finding all testcases.yml files @@ -150,27 +195,30 @@ class Test: return result def runImpl(self, flavor, device, args): - # Locating and reading baseline file - baselineFile = self.findBaselineFile(flavor, device) - if baselineFile == None: - return TestRunResult.fatalError("Baseline file sanity check", "Can't find baseline file") + result = TestRunResult() + result.succeeded = True - with open(baselineFile, "r") as f: - baseline = f.read().split("\n") - if args.verbose: - print "Baseline:", baselineFile + # Preparation for pattern-based test cases + if len(self.testCases) > 0: + # Locating and reading baseline file + baselineFile = self.findBaselineFile(flavor, device) + if baselineFile == None: + return TestRunResult.fatalError("Baseline file sanity check", "Can't find baseline file") + + with open(baselineFile, "r") as f: + baseline = f.read().split("\n") + if args.verbose: + print "Baseline:", baselineFile # Before running the test, pre-creating TestCaseRunResult object for each test case # and compute filtered lines from baseline file. # Note: some test cases might fail at this time if baseline and/or patterns are inconsistant - result = TestRunResult() - result.succeeded = True - if not args.update_baseline: - for testCase in self.testCases: - testCaseRunResult = testCase.processBaseline(baseline) - if not testCaseRunResult.succeeded: - result.succeeded = False - result.testCaseRunResults.append(testCaseRunResult) + if not args.update_baseline: + for testCase in self.testCases: + testCaseRunResult = testCase.processBaseline(baseline) + if not testCaseRunResult.succeeded: + result.succeeded = False + result.testCaseRunResults.append(testCaseRunResult) # preparing run directory runDir = os.path.join(args.run_dir, "{0}_{1}@{2}_{3}".format(self.suite, self.name, flavor, device)) @@ -236,7 +284,7 @@ class Test: if not testCaseRunResult.succeeded: result.succeeded = False - if args.update_baseline and result.succeeded: + if (self.testCases)>0 and args.update_baseline and result.succeeded: # When running in --update-baseline mode # verifying that new output is succesfully matching every pattern in the testcases.yml # If this is not the case then baseline update will be rejected @@ -273,6 +321,20 @@ class Test: return fullPath return None + # Checks whether the test matches the specified tag, + # returns matched tag name on succes, or None if there is no match(boolean, string) tuple + def matchesTag(self, tag, flavor, device, os): + tagL = tag.lower() # normalizing the tag for comparison + # enumerating all the tags + for tag in self.tags.keys(): + # match by direct string comparison or by prefix matching rule: + # e.g: 'bvt' matches 'bvt' 'bvt-a', 'bvt-b' but not 'bvtx' + if tag==tagL or tag.startswith(tagL + "-"): + # evaluating tag's predicate + if self.tags[tag](flavor=flavor, device=device, os=os): + return tag + return None + # This class encapsulates one testcase (in testcases.yml file) class TestCase: def __init__(self, name, yamlNode): @@ -451,8 +513,22 @@ class TestCaseRunResult: # Lists all available tests def listCommand(args): - for t in Test.allTestsIndexedByFullName.values(): - print t.fullName + testsByTag = {} + for test in Test.allTestsIndexedByFullName.values(): + for flavor in args.flavors: + for device in args.devices: + for os in args.oses: + tag = test.matchesTag(args.tag, flavor, device, os) if args.tag else '*' + if tag: + if tag in testsByTag.keys(): + testsByTag[tag].add(test.fullName) + else: + testsByTag[tag] = sets.Set([test.fullName]) + for tag in sorted(testsByTag.keys()): + if tag=="*": + print ' '.join(sorted(testsByTag[tag])) + else: + print tag+":", ' '.join(sorted(testsByTag[tag])) # Runs given test(s) or all tests def runCommand(args): @@ -466,21 +542,9 @@ def runCommand(args): return 1 else: testsToRun = Test.allTestsIndexedByFullName.values() - devices = ["cpu", "gpu"] - if (args.device): - args.device = args.device.lower() - if not args.device in devices: - print >>sys.stderr, "--device must be one of", devices - return 1 - devices = [args.device] - flavors = ["debug", "release"] - if (args.flavor): - args.flavor = args.flavor.lower() - if not args.flavor in flavors: - print >>sys.stderr, "--flavor must be one of", flavors - return 1 - flavors = [args.flavor] + devices = args.devices + flavors = args.flavors print "CNTK Test Driver is started" print "Running tests: ", " ".join([y.fullName for y in testsToRun]) @@ -495,16 +559,24 @@ def runCommand(args): for test in testsToRun: for flavor in flavors: for device in devices: + if args.tag and args.tag != '' and not test.matchesTag(args.tag, flavor, device, 'windows' if windows else 'linux'): + continue totalCount = totalCount + 1 + if len(test.testCases)==0: + # forcing verbose mode (showing all output) for all test which are based on exit code (no pattern-based test cases) + args.verbose = True # Printing the test which is about to run (without terminating the line) sys.stdout.write("Running test {0} ({1} {2}) - ".format(test.fullName, flavor, device)); + if args.dry_run: + print "[SKIPPED] (dry-run)" + continue # in verbose mode, terminate the line, since there will be a lot of output if args.verbose: sys.stdout.write("\n"); sys.stdout.flush() # Running the test and collecting a run results result = test.run(flavor, device, args) - + if args.verbose: # writing the test name one more time (after possibly long verbose output) sys.stdout.write("Test finished {0} ({1} {2}) - ".format(test.fullName, flavor, device)); @@ -549,17 +621,24 @@ runSubparser.add_argument("test", nargs="*", defaultBuildLocation=os.path.realpath(os.path.join(thisDir, "..", "x64" if windows else "build")) runSubparser.add_argument("-b", "--build-location", default=defaultBuildLocation, help="location of the CNTK build to run") -runSubparser.add_argument("-d", "--device", help="cpu|gpu - run on a specific device") -runSubparser.add_argument("-f", "--flavor", help="release|debug - run only a specific flavor") +runSubparser.add_argument("-t", "--tag", help="runs tests which match the spacified tag") +runSubparser.add_argument("-d", "--device", help="cpu|gpu - run on a specified device") +runSubparser.add_argument("-f", "--flavor", help="release|debug - run only a specified flavor") tmpDir = os.getenv("TEMP") if windows else "/tmp" defaultRunDir=os.path.join(tmpDir, "cntk-test-{0}.{1}".format(time.strftime("%Y%m%d%H%M%S"), random.randint(0,1000000))) runSubparser.add_argument("-r", "--run-dir", default=defaultRunDir, help="directory where to store test output, default: a random dir within /tmp") runSubparser.add_argument("--update-baseline", action='store_true', help="update baseline file(s) instead of matching them") runSubparser.add_argument("-v", "--verbose", action='store_true', help="verbose output - dump all output of test script") +runSubparser.add_argument("-n", "--dry-run", action='store_true', help="do not run the tests, only print test names and condfigurations to be run") runSubparser.set_defaults(func=runCommand) listSubparser = subparsers.add_parser("list", help="list available tests") +listSubparser.add_argument("-t", "--tag", help="limits a resulting list to tests matching the spacified tag") +listSubparser.add_argument("-d", "--device", help="cpu|gpu - tests for a specified device") +listSubparser.add_argument("-f", "--flavor", help="release|debug - tests for specified flavor") +listSubparser.add_argument("--os", help="windows|linux - tests for a specified operating system") + listSubparser.set_defaults(func=listCommand) if len(sys.argv)==1: @@ -568,6 +647,32 @@ if len(sys.argv)==1: args = parser.parse_args(sys.argv[1:]) +# parsing a --device, --flavor and --os options: +args.devices = ["cpu", "gpu"] +if (args.device): + args.device = args.device.lower() + if not args.device in args.devices: + print >>sys.stderr, "--device must be one of", args.devices + sys.exit(1) + args.devices = [args.device] + +args.flavors = ["debug", "release"] +if (args.flavor): + args.flavor = args.flavor.lower() + if not args.flavor in args.flavors: + print >>sys.stderr, "--flavor must be one of", args.flavors + sys.exit(1) + args.flavors = [args.flavor] + +if args.func == listCommand: + args.oses = ["windows", "linux"] + if (args.os): + args.os = args.os.lower() + if not args.os in args.oses: + print >>sys.stderr, "--os must be one of", args.oses + sys.exit(1) + args.oses = [args.os] + # discover all the tests Test.discoverAllTests()