Introducing a flexible test tagging system:

Each test is now tagged, so we can control what runs and when.
  Tests tagged with 'bvt-*' are run on every checkin job in a build lab
  Tests tagged with 'nightly-*' are run every night

Predicates mechanism embedded into the new tagging system allows to tag a test
in a conditional way, specifying python expression as a predicate. That
allows us to selectively reduce BVT test matrix and speed-up execution of BVT runs in a lab
This commit is contained in:
Vladimir Ivanov 2015-09-03 14:33:29 -07:00
Родитель d1488ac896
Коммит 45132447d9
5 изменённых файлов: 172 добавлений и 45 удалений

Просмотреть файл

@ -1,4 +1,10 @@
dataDir: ../../Data
# running on every BVT job in 'P' (Parallel) leg in Debug-GPU and Release-CPU configurations:
# Also skipping Release-CPU on linux to save time (for now due to unknown reason it runs much longer comparing to windows)
- bvt-p ((flavor=='debug') ^ (device=='cpu')) and not (os=='linux' and device=='cpu')
# running unconditionally on every Nightly job in 'P' leg
- nightly-p
Must train epochs in exactly same order and parameters for each MPI Rank:

Просмотреть файл

@ -1,5 +1,11 @@
dataDir: ../../Data
# running on every BVT job in 'P' (Parallel) leg in Debug-GPU and Release-CPU configurations:
- bvt-p (flavor=='debug') ^ (device=='cpu')
# running unconditionally on every Nightly job in 'P' leg
- nightly-p
Must train epochs in exactly same order and parameters for each MPI Rank:

Просмотреть файл

@ -1,4 +1,9 @@
dataDir: ../Data
# running on every BVT job in 'L' (LSTM) leg in Debug-GPU and Release-CPU configurations:
- bvt-l (flavor=='debug') ^ (device=='cpu')
# running unconditionally on every Nightly job in 'L' leg
- nightly-l
CNTK Run must be completed:

Просмотреть файл

@ -1,4 +1,9 @@
dataDir: ../Data
# running on every BVT job in 'S' (Speech) leg in Debug-GPU and Release-CPU configurations:
- bvt-s (flavor=='debug') ^ (device=='cpu')
# running unconditionally on every Nightly job in 'S' leg
- nightly-s
CNTK Run must be completed:

Просмотреть файл

@ -20,6 +20,10 @@
# ----- testcases.yml format -------
# dataDir: <path> #<relative-path-to the data directory
# tags: # optional tags - see tagging system
# - <tag1> <optional-predicate>
# - <tag2> <optional-predicate>
# - ....
# testCases:
# <name of the testcase 1>:
@ -62,7 +66,18 @@
# 8. baseline.txt
# where <flavor> = { debug | release }
# <device> = { cpu | gpu }
# Baseline files are optional. They only evaluate if test defines one or more pattern-drivern test cases.
# If no test cases are defined, then TestDriver uses exit code of the run-test script as the only criteria
# of successful copmpletion of the test.
# ----- Tagging system ------
# Unit tests can be optionally tagged with 1 or many tags
# CNTK build/test lab uses those tags to understand which tests to run during different flavors of build jobs (nightly, BVT, checkin)
# Tag can be optionally predicated with a python boolean expression over 'flavor' (debug/release), 'device' (cpu/gpu), 'os' (windows/linux) variables.
# this allows to restrict tagging of the test to specific combinations of those variables
# ----- Algorithm ------
# Baseline verification:
# For each testcase
@ -80,7 +95,7 @@
# matching against all test-cases/pattern simulteneously
import sys, os, argparse, traceback, yaml, subprocess, random, re, time
import sys, os, argparse, traceback, yaml, subprocess, random, re, time, sets
thisDir = os.path.dirname(os.path.realpath(__file__))
windows = os.getenv("OS")=="Windows_NT"
@ -97,8 +112,10 @@ class Test:
self.suite = suite = name
self.fullName = suite + "/" + name
# computing location of test directory (yml file directory)
self.testDir = os.path.dirname(pathToYmlFile)
# parsing yml file with testcases
with open(pathToYmlFile, "r") as f:
self.rawYamlData = yaml.safe_load(
@ -109,14 +126,42 @@ class Test:
self.dataDir = self.testDir
testCasesYaml = self.rawYamlData["testCases"]
# parsing test cases
self.testCases = []
for name in testCasesYaml.keys():
self.testCases.append(TestCase(name, testCasesYaml[name]))
except Exception as e:
print >>sys.stderr, "ERROR registering test case: " + name
if "testCases" in self.rawYamlData.keys():
testCasesYaml = self.rawYamlData["testCases"]
for name in testCasesYaml.keys():
self.testCases.append(TestCase(name, testCasesYaml[name]))
except Exception as e:
print >>sys.stderr, "ERROR registering test case: " + name
# parsing all tags, example input:
# tags:
# - bvt-l (flavor=='debug') ^ (device=='cpu') # tag with a python predicate expression
# - nightly-l #tag without a predicate
# Predicate expressions must produce boolean value and may refer to following variables: flavor, device, os
self.tags = {}
if self.rawYamlData["tags"]:
for tagLine in self.rawYamlData["tags"]:
tagLineSplit = tagLine.split(' ', 1) # splitting tag name from predicate expression
tagName = tagLineSplit[0].lower().strip()
# using specified python expression (or 'True' if former isn't provided)
pythonExpr = tagLineSplit[1] if len(tagLineSplit)==2 else "True"
# converting python expression into lambda and doing a smoke test by calling it with dummy parameters
predicate = lambda pythonExpr=pythonExpr, **kwargs: eval(pythonExpr, kwargs)
assert(type(predicate(flavor='foo', device='var', os='foobar')) == bool)
except Exception as e:
print "Can't parse tag predicate expression in {0} ({1}):\n{2}".format(pathToYmlFile, pythonExpr, e)
raise e
# saving generated lambda into tags dictionary
self.tags[tagName] = predicate
# Populates Tests.allTestsIndexedByFullName by scanning directory tree
# and finding all testcases.yml files
@ -150,27 +195,30 @@ class Test:
return result
def runImpl(self, flavor, device, args):
# Locating and reading baseline file
baselineFile = self.findBaselineFile(flavor, device)
if baselineFile == None:
return TestRunResult.fatalError("Baseline file sanity check", "Can't find baseline file")
result = TestRunResult()
result.succeeded = True
with open(baselineFile, "r") as f:
baseline ="\n")
if args.verbose:
print "Baseline:", baselineFile
# Preparation for pattern-based test cases
if len(self.testCases) > 0:
# Locating and reading baseline file
baselineFile = self.findBaselineFile(flavor, device)
if baselineFile == None:
return TestRunResult.fatalError("Baseline file sanity check", "Can't find baseline file")
with open(baselineFile, "r") as f:
baseline ="\n")
if args.verbose:
print "Baseline:", baselineFile
# Before running the test, pre-creating TestCaseRunResult object for each test case
# and compute filtered lines from baseline file.
# Note: some test cases might fail at this time if baseline and/or patterns are inconsistant
result = TestRunResult()
result.succeeded = True
if not args.update_baseline:
for testCase in self.testCases:
testCaseRunResult = testCase.processBaseline(baseline)
if not testCaseRunResult.succeeded:
result.succeeded = False
if not args.update_baseline:
for testCase in self.testCases:
testCaseRunResult = testCase.processBaseline(baseline)
if not testCaseRunResult.succeeded:
result.succeeded = False
# preparing run directory
runDir = os.path.join(args.run_dir, "{0}_{1}@{2}_{3}".format(self.suite,, flavor, device))
@ -236,7 +284,7 @@ class Test:
if not testCaseRunResult.succeeded:
result.succeeded = False
if args.update_baseline and result.succeeded:
if (self.testCases)>0 and args.update_baseline and result.succeeded:
# When running in --update-baseline mode
# verifying that new output is succesfully matching every pattern in the testcases.yml
# If this is not the case then baseline update will be rejected
@ -273,6 +321,20 @@ class Test:
return fullPath
return None
# Checks whether the test matches the specified tag,
# returns matched tag name on succes, or None if there is no match(boolean, string) tuple
def matchesTag(self, tag, flavor, device, os):
tagL = tag.lower() # normalizing the tag for comparison
# enumerating all the tags
for tag in self.tags.keys():
# match by direct string comparison or by prefix matching rule:
# e.g: 'bvt' matches 'bvt' 'bvt-a', 'bvt-b' but not 'bvtx'
if tag==tagL or tag.startswith(tagL + "-"):
# evaluating tag's predicate
if self.tags[tag](flavor=flavor, device=device, os=os):
return tag
return None
# This class encapsulates one testcase (in testcases.yml file)
class TestCase:
def __init__(self, name, yamlNode):
@ -451,8 +513,22 @@ class TestCaseRunResult:
# Lists all available tests
def listCommand(args):
for t in Test.allTestsIndexedByFullName.values():
print t.fullName
testsByTag = {}
for test in Test.allTestsIndexedByFullName.values():
for flavor in args.flavors:
for device in args.devices:
for os in args.oses:
tag = test.matchesTag(args.tag, flavor, device, os) if args.tag else '*'
if tag:
if tag in testsByTag.keys():
testsByTag[tag] = sets.Set([test.fullName])
for tag in sorted(testsByTag.keys()):
if tag=="*":
print ' '.join(sorted(testsByTag[tag]))
print tag+":", ' '.join(sorted(testsByTag[tag]))
# Runs given test(s) or all tests
def runCommand(args):
@ -466,21 +542,9 @@ def runCommand(args):
return 1
testsToRun = Test.allTestsIndexedByFullName.values()
devices = ["cpu", "gpu"]
if (args.device):
args.device = args.device.lower()
if not args.device in devices:
print >>sys.stderr, "--device must be one of", devices
return 1
devices = [args.device]
flavors = ["debug", "release"]
if (args.flavor):
args.flavor = args.flavor.lower()
if not args.flavor in flavors:
print >>sys.stderr, "--flavor must be one of", flavors
return 1
flavors = [args.flavor]
devices = args.devices
flavors = args.flavors
print "CNTK Test Driver is started"
print "Running tests: ", " ".join([y.fullName for y in testsToRun])
@ -495,16 +559,24 @@ def runCommand(args):
for test in testsToRun:
for flavor in flavors:
for device in devices:
if args.tag and args.tag != '' and not test.matchesTag(args.tag, flavor, device, 'windows' if windows else 'linux'):
totalCount = totalCount + 1
if len(test.testCases)==0:
# forcing verbose mode (showing all output) for all test which are based on exit code (no pattern-based test cases)
args.verbose = True
# Printing the test which is about to run (without terminating the line)
sys.stdout.write("Running test {0} ({1} {2}) - ".format(test.fullName, flavor, device));
if args.dry_run:
print "[SKIPPED] (dry-run)"
# in verbose mode, terminate the line, since there will be a lot of output
if args.verbose:
# Running the test and collecting a run results
result =, device, args)
if args.verbose:
# writing the test name one more time (after possibly long verbose output)
sys.stdout.write("Test finished {0} ({1} {2}) - ".format(test.fullName, flavor, device));
@ -549,17 +621,24 @@ runSubparser.add_argument("test", nargs="*",
defaultBuildLocation=os.path.realpath(os.path.join(thisDir, "..", "x64" if windows else "build"))
runSubparser.add_argument("-b", "--build-location", default=defaultBuildLocation, help="location of the CNTK build to run")
runSubparser.add_argument("-d", "--device", help="cpu|gpu - run on a specific device")
runSubparser.add_argument("-f", "--flavor", help="release|debug - run only a specific flavor")
runSubparser.add_argument("-t", "--tag", help="runs tests which match the spacified tag")
runSubparser.add_argument("-d", "--device", help="cpu|gpu - run on a specified device")
runSubparser.add_argument("-f", "--flavor", help="release|debug - run only a specified flavor")
tmpDir = os.getenv("TEMP") if windows else "/tmp"
defaultRunDir=os.path.join(tmpDir, "cntk-test-{0}.{1}".format(time.strftime("%Y%m%d%H%M%S"), random.randint(0,1000000)))
runSubparser.add_argument("-r", "--run-dir", default=defaultRunDir, help="directory where to store test output, default: a random dir within /tmp")
runSubparser.add_argument("--update-baseline", action='store_true', help="update baseline file(s) instead of matching them")
runSubparser.add_argument("-v", "--verbose", action='store_true', help="verbose output - dump all output of test script")
runSubparser.add_argument("-n", "--dry-run", action='store_true', help="do not run the tests, only print test names and condfigurations to be run")
listSubparser = subparsers.add_parser("list", help="list available tests")
listSubparser.add_argument("-t", "--tag", help="limits a resulting list to tests matching the spacified tag")
listSubparser.add_argument("-d", "--device", help="cpu|gpu - tests for a specified device")
listSubparser.add_argument("-f", "--flavor", help="release|debug - tests for specified flavor")
listSubparser.add_argument("--os", help="windows|linux - tests for a specified operating system")
if len(sys.argv)==1:
@ -568,6 +647,32 @@ if len(sys.argv)==1:
args = parser.parse_args(sys.argv[1:])
# parsing a --device, --flavor and --os options:
args.devices = ["cpu", "gpu"]
if (args.device):
args.device = args.device.lower()
if not args.device in args.devices:
print >>sys.stderr, "--device must be one of", args.devices
args.devices = [args.device]
args.flavors = ["debug", "release"]
if (args.flavor):
args.flavor = args.flavor.lower()
if not args.flavor in args.flavors:
print >>sys.stderr, "--flavor must be one of", args.flavors
args.flavors = [args.flavor]
if args.func == listCommand:
args.oses = ["windows", "linux"]
if (args.os):
args.os = args.os.lower()
if not args.os in args.oses:
print >>sys.stderr, "--os must be one of", args.oses
args.oses = [args.os]
# discover all the tests