diff --git a/Source/CNTK/CNTK.cpp b/Source/CNTK/CNTK.cpp index 714837c49..7e2083cc2 100644 --- a/Source/CNTK/CNTK.cpp +++ b/Source/CNTK/CNTK.cpp @@ -381,13 +381,13 @@ void PrintGpuInfo() } LOGPRINTF(stderr, "-------------------------------------------------------------------\n"); - LOGPRINTF(stderr, "GPU info: \n\n"); + LOGPRINTF(stderr, "GPU info:\n\n"); for (GpuData& data : gpusData) { - LOGPRINTF(stderr, "\t\tDevice ID: %d\n", data.deviceId); - LOGPRINTF(stderr, "\t\tCompute Capability: %d.%d\n", data.major, data.minor); - LOGPRINTF(stderr, "\t\tCUDA cores: %d\n", data.cudaCores); + LOGPRINTF(stderr, "\t\tDevice ID: %d\n", data.m_deviceId); + LOGPRINTF(stderr, "\t\tCompute Capability: %d.%d\n", data.m_major, data.m_minor); + LOGPRINTF(stderr, "\t\tCUDA cores: %d\n", data.m_cudaCores); } LOGPRINTF(stderr, "-------------------------------------------------------------------\n"); #endif @@ -522,7 +522,7 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp // echo config info to log PrintBuiltInfo(); - //echo gpu info to log + // echo gpu info to log PrintGpuInfo(); // execute the actions @@ -713,7 +713,6 @@ int wmain1(int argc, wchar_t* argv[]) // called from wmain which is a wrapper th try { PrintBuiltInfo(); // print build info directly in case that user provides zero argument (convenient for checking build type) - PrintGpuInfo(); if (argc <= 1) { diff --git a/Source/Common/BestGpu.cpp b/Source/Common/BestGpu.cpp index a833c6c65..1009783a2 100644 --- a/Source/Common/BestGpu.cpp +++ b/Source/Common/BestGpu.cpp @@ -541,18 +541,19 @@ std::vector GetGpusData() for (ProcessorData* pd : processorData) { GpuData gpuData; - gpuData.major = pd->deviceProp.major; - gpuData.minor = pd->deviceProp.minor; - gpuData.cudaCores = pd->cores; - gpuData.deviceId = pd->deviceId; + gpuData.m_major = pd->deviceProp.major; + gpuData.m_minor = pd->deviceProp.minor; + gpuData.m_cudaCores = pd->cores; + gpuData.m_deviceId = pd->deviceId; data.push_back(gpuData); } } - return std::move(data); + return data; } -std::vector BestGpu::GetProcessorData(){ +std::vector BestGpu::GetProcessorData() +{ return m_procData; } diff --git a/Source/Common/Include/BestGpu.h b/Source/Common/Include/BestGpu.h index 1be1fcac5..3921b2d8c 100644 --- a/Source/Common/Include/BestGpu.h +++ b/Source/Common/Include/BestGpu.h @@ -16,11 +16,12 @@ namespace Microsoft { namespace MSR { namespace ScriptableObjects { struct IConf namespace Microsoft { namespace MSR { namespace CNTK { #ifndef CPUONLY -struct GpuData{ - int major; - int minor; - int deviceId; - int cudaCores; +struct GpuData +{ + int m_major; + int m_minor; + int m_deviceId; + int m_cudaCores; }; std::vector GetGpusData(); diff --git a/Tests/EndToEndTests/MetricsDriver.py b/Tests/EndToEndTests/MetricsDriver.py index b0acb65bf..82623a1da 100644 --- a/Tests/EndToEndTests/MetricsDriver.py +++ b/Tests/EndToEndTests/MetricsDriver.py @@ -7,6 +7,7 @@ # and generates a markdown file (wiki page) import sys, os, csv, traceback, re +import TestDriver as td try: import six @@ -17,17 +18,6 @@ except ImportError: thisDir = os.path.dirname(os.path.realpath(__file__)) windows = os.getenv("OS")=="Windows_NT" -def cygpath(path, relative=False): - if windows: - if path.startswith('/'): - return path - path = os.path.abspath(path) - if not relative and path[1]==':': # Windows drive - path = '/cygdrive/' + path[0] + path[2:] - path = path.replace('\\','/') - - return path - class Baseline: def __init__(self, fullPath, opSystem, device, flavor, testResult = "", trainResult = ""): self.fullPath = fullPath @@ -40,36 +30,38 @@ class Baseline: self.trainResult = trainResult def getResultsInfo(self, baselineContent): - trainResults = re.findall('.*(Finished Epoch\[[ ]*\d+ of \d+\]\: \[Training\]) (.*)', baselineContent, re.MULTILINE) + trainResults = re.findall('.*(Finished Epoch\[[ ]*\d+ of \d+\]\: \[Training\]) (.*)', baselineContent) if trainResults: self.trainResult = Baseline.getLastTrainResult(trainResults[-1])[0:-2] - testResults = re.findall('.*(Final Results: Minibatch\[1-\d+\]:(\s+\* \d+|))\s+(.*)', baselineContent, re.MULTILINE) + testResults = re.findall('.*(Final Results: Minibatch\[1-\d+\]:)(\s+\* \d+|)?\s+(.*)', baselineContent) if testResults: self.testResult = Baseline.getLastTestResult(testResults[-1])[0:-2] def getHardwareInfo(self, baselineContent): - cpuInfo = re.search(".*Hardware info:\s+" - "CPU Model (Name:\s*.*)\s+" - "CPU (Cores:\s*.*)\s+" - "(Hardware threads: \d+)\s+" - "Total (Memory:\s*.*)\s+" - "GPU Model (Name: .*)?\s+" - "GPU (Memory: .*)?", baselineContent) - if cpuInfo is None: - return - self.cpuInfo = "\n".join(cpuInfo.groups()[0:4]) - hwInfo = cpuInfo.groups()[4:] - startGpuInfoIndex = baselineContent.find("GPU info: ") - endGpuInfoIndex = baselineContent.find("----------", startGpuInfoIndex) - gpuInfo = re.findall("\t\t(Device ID: \d+)\s+" - "(Compute Capability: \d\.\d)\s+" - "(CUDA cores: \d+)", baselineContent[startGpuInfoIndex:endGpuInfoIndex]) - if not gpuInfo: + startHardwareInfoIndex = baselineContent.find("Hardware info:") + endHardwareInfoIndex = baselineContent.find("----------", startHardwareInfoIndex) + hwInfo = re.search("^Hardware info:\s+" + "CPU Model (Name:\s*.*)\s+" + "(Hardware threads: \d+)\s+" + "Total (Memory:\s*.*)\s+" + "GPU Model (Name: .*)?\s+" + "GPU (Memory: .*)?", baselineContent[startHardwareInfoIndex:endHardwareInfoIndex], re.MULTILINE) + if hwInfo is None: return - for index in range(0, len(gpuInfo)): - hwInfo = hwInfo + gpuInfo[index] - self.gpuInfo = "\n".join(hwInfo) + self.cprintpuInfo = "\n".join(hwInfo.groups()[:3]) + gpuInfo = hwInfo.groups()[3:] + + startGpuInfoIndex = baselineContent.find("GPU info:") + endGpuInfoIndex = baselineContent.find("----------", startGpuInfoIndex) + gpuCapability = re.findall("\t\t(Device ID: \d+)\s+[\w/: \t]+" + "(Compute Capability: \d\.\d)\s+[\w/: \t]+" + "(CUDA cores: \d+)", baselineContent[startGpuInfoIndex:endGpuInfoIndex]) + if not gpuCapability: + return + for index in range(0, len(gpuCapability)): + gpuInfo = gpuInfo + gpuCapability[index] + self.gpuInfo = "\n".join(gpuInfo) @staticmethod def getLastTestResult(line): @@ -107,15 +99,6 @@ class Example: example = Example(suiteName, exampleName, testDir) Example.allExamplesIndexedByFullName[example.fullName.lower()] = example - # Finds a location of a baseline file by probing different names in the following order: - # baseline.$os.$flavor.$device.txt - # baseline.$os.$flavor.txt - # baseline.$os.$device.txt - # baseline.$os.txt - # baseline.$flavor.$device.txt - # baseline.$flavor.txt - # baseline.$device.txt - # baseline.txt def findBaselineFilesList(self): baselineFilesList = [] @@ -127,7 +110,7 @@ class Example: for device in devices: for flavor in flavors: candidateName = "baseline" + o + flavor + device + ".txt" - fullPath = cygpath(os.path.join(self.testDir, candidateName), relative=True) + fullPath = td.cygpath(os.path.join(self.testDir, candidateName), relative=True) if os.path.isfile(fullPath): baseline = Baseline(fullPath, o[1:], device[1:], flavor[1:]); baselineFilesList.append(baseline) @@ -144,10 +127,10 @@ def getExamplesMetrics(): for example in allExamples: baselineListForExample = example.findBaselineFilesList() six.print_("Example: " + example.fullName) - for baseline in baselineListForExample: + for baseline in baselineListForExample: with open(baseline.fullPath, "r") as f: baselineContent = f.read() - gitHash = re.search('.*Build SHA1:\s([a-z0-9]{40})\s', baselineContent) + gitHash = re.search('.*Build SHA1:\s([a-z0-9]{40})[\r\n]+', baselineContent, re.MULTILINE) if gitHash is None: continue example.gitHash = gitHash.group(1) @@ -176,7 +159,8 @@ def writeMetricsToAsciidoc(): metricsFile.write("|====\n") metricsFile.write("|Log file / Configuration | Train Result | Test Result\n") for baseline in example.baselineList: - metricsFile.write("".join(["|link:../blob/master/Tests/EndToEndTests/", baseline.fullPath.split(thisDir)[1][1:], "[", + pathInDir=baseline.fullPath.split(thisDir)[1][1:] + metricsFile.write("".join(["|link:../blob/", example.gitHash[:7],"/Tests/EndToEndTests/", pathInDir, "[", baseline.fullPath.split("/")[-1], "] .2+|", baseline.trainResult.replace("\n", " "), " .2+|", baseline.testResult.replace("\n", " "), "|\n"])) cpuInfo = "".join(["CPU: ", re.sub("[\r]?\n", ' ', baseline.cpuInfo)]) diff --git a/Tests/EndToEndTests/TestDriver.py b/Tests/EndToEndTests/TestDriver.py index 2484808bd..edc795205 100755 --- a/Tests/EndToEndTests/TestDriver.py +++ b/Tests/EndToEndTests/TestDriver.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python # ---------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- @@ -687,89 +687,90 @@ def runCommand(args): sys.exit(10) # ======================= Entry point ======================= -parser = argparse.ArgumentParser(description="TestDriver - CNTK Test Driver") -subparsers = parser.add_subparsers(help="command to execute. Run TestDriver.py --help for command-specific help") -runSubparser = subparsers.add_parser("run", help="run test(s)") -runSubparser.add_argument("test", nargs="*", - help="optional test name(s) to run, specified as Suite/TestName. " - "Use list command to list available tests. " - "If not specified then all tests will be run.") +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="TestDriver - CNTK Test Driver") + subparsers = parser.add_subparsers(help="command to execute. Run TestDriver.py --help for command-specific help") + runSubparser = subparsers.add_parser("run", help="run test(s)") + runSubparser.add_argument("test", nargs="*", + help="optional test name(s) to run, specified as Suite/TestName. " + "Use list command to list available tests. " + "If not specified then all tests will be run.") -defaultBuildSKU = "gpu" + defaultBuildSKU = "gpu" -runSubparser.add_argument("-b", "--build-location", help="location of the CNTK build to run") -runSubparser.add_argument("-t", "--tag", help="runs tests which match the specified tag") -runSubparser.add_argument("-d", "--device", help="cpu|gpu - run on a specified device") -runSubparser.add_argument("-f", "--flavor", help="release|debug - run only a specified flavor") -runSubparser.add_argument("-s", "--build-sku", default=defaultBuildSKU, help="cpu|gpu|1bitsgd - run tests only for a specified build SKU") -tmpDir = os.getenv("TEMP") if windows else "/tmp" -defaultRunDir=os.path.join(tmpDir, "cntk-test-{0}.{1}".format(time.strftime("%Y%m%d%H%M%S"), random.randint(0,1000000))) -runSubparser.add_argument("-r", "--run-dir", default=defaultRunDir, help="directory where to store test output, default: a random dir within /tmp") -runSubparser.add_argument("--update-baseline", action='store_true', help="update baseline file(s) instead of matching them") -runSubparser.add_argument("--create-baseline", action='store_true', help="create new baseline file(s) (named as baseline...txt) for tests that do not currently have baselines") -runSubparser.add_argument("-v", "--verbose", action='store_true', help="verbose output - dump all output of test script") -runSubparser.add_argument("-n", "--dry-run", action='store_true', help="do not run the tests, only print test names and configurations to be run along with full command lines") + runSubparser.add_argument("-b", "--build-location", help="location of the CNTK build to run") + runSubparser.add_argument("-t", "--tag", help="runs tests which match the specified tag") + runSubparser.add_argument("-d", "--device", help="cpu|gpu - run on a specified device") + runSubparser.add_argument("-f", "--flavor", help="release|debug - run only a specified flavor") + runSubparser.add_argument("-s", "--build-sku", default=defaultBuildSKU, help="cpu|gpu|1bitsgd - run tests only for a specified build SKU") + tmpDir = os.getenv("TEMP") if windows else "/tmp" + defaultRunDir=os.path.join(tmpDir, "cntk-test-{0}.{1}".format(time.strftime("%Y%m%d%H%M%S"), random.randint(0,1000000))) + runSubparser.add_argument("-r", "--run-dir", default=defaultRunDir, help="directory where to store test output, default: a random dir within /tmp") + runSubparser.add_argument("--update-baseline", action='store_true', help="update baseline file(s) instead of matching them") + runSubparser.add_argument("--create-baseline", action='store_true', help="create new baseline file(s) (named as baseline...txt) for tests that do not currently have baselines") + runSubparser.add_argument("-v", "--verbose", action='store_true', help="verbose output - dump all output of test script") + runSubparser.add_argument("-n", "--dry-run", action='store_true', help="do not run the tests, only print test names and configurations to be run along with full command lines") -runSubparser.set_defaults(func=runCommand) + runSubparser.set_defaults(func=runCommand) -listSubparser = subparsers.add_parser("list", help="list available tests") -listSubparser.add_argument("-t", "--tag", help="limits a resulting list to tests matching the specified tag") -listSubparser.add_argument("-d", "--device", help="cpu|gpu - tests for a specified device") -listSubparser.add_argument("-f", "--flavor", help="release|debug - tests for specified flavor") -listSubparser.add_argument("-s", "--build-sku", default=defaultBuildSKU, help="cpu|gpu|1bitsgd - list tests only for a specified build SKU") -listSubparser.add_argument("--os", help="windows|linux - tests for a specified operating system") + listSubparser = subparsers.add_parser("list", help="list available tests") + listSubparser.add_argument("-t", "--tag", help="limits a resulting list to tests matching the specified tag") + listSubparser.add_argument("-d", "--device", help="cpu|gpu - tests for a specified device") + listSubparser.add_argument("-f", "--flavor", help="release|debug - tests for specified flavor") + listSubparser.add_argument("-s", "--build-sku", default=defaultBuildSKU, help="cpu|gpu|1bitsgd - list tests only for a specified build SKU") + listSubparser.add_argument("--os", help="windows|linux - tests for a specified operating system") -listSubparser.set_defaults(func=listCommand) + listSubparser.set_defaults(func=listCommand) -if len(sys.argv)==1: - parser.print_help() - sys.exit(1) - -args = parser.parse_args(sys.argv[1:]) - -# parsing a --device, --flavor and --os options: -args.devices = ["cpu", "gpu"] -if (args.device): - args.device = args.device.lower() - if not args.device in args.devices: - six.print_("--device must be one of", args.devices, file=sys.stderr) - sys.exit(1) - args.devices = [args.device] - -args.flavors = ["debug", "release"] -if (args.flavor): - args.flavor = args.flavor.lower() - if not args.flavor in args.flavors: - six.print_("--flavor must be one of", args.flavors, file=sys.stderr) - sys.exit(1) - args.flavors = [args.flavor] - -args.buildSKUs = ["cpu", "gpu", "1bitsgd"] -if (args.build_sku): - args.build_sku = args.build_sku.lower() - if not args.build_sku in args.buildSKUs: - six.print_("--build-sku must be one of", args.buildSKUs, file=sys.stderr) - sys.exit(1) - args.buildSKUs = [args.build_sku] - if args.build_sku == "cpu" and args.devices == ["gpu"]: - print >>sys.stderr, "Invalid combination: --build-sku cpu and --device gpu" - sys.exit(1) - -if args.func == runCommand and not args.build_location: - args.build_location = os.path.realpath(os.path.join(thisDir, "../..", "x64" if windows else "build/")) - -if args.func == listCommand: - args.oses = ["windows", "linux"] - if (args.os): - args.os = args.os.lower() - if not args.os in args.oses: - six.print_("--os must be one of", args.oses, file=sys.stderr) + if len(sys.argv)==1: + parser.print_help() sys.exit(1) - args.oses = [args.os] -# discover all the tests -Test.discoverAllTests() + args = parser.parse_args(sys.argv[1:]) -# execute the command -args.func(args) + # parsing a --device, --flavor and --os options: + args.devices = ["cpu", "gpu"] + if (args.device): + args.device = args.device.lower() + if not args.device in args.devices: + six.print_("--device must be one of", args.devices, file=sys.stderr) + sys.exit(1) + args.devices = [args.device] + + args.flavors = ["debug", "release"] + if (args.flavor): + args.flavor = args.flavor.lower() + if not args.flavor in args.flavors: + six.print_("--flavor must be one of", args.flavors, file=sys.stderr) + sys.exit(1) + args.flavors = [args.flavor] + + args.buildSKUs = ["cpu", "gpu", "1bitsgd"] + if (args.build_sku): + args.build_sku = args.build_sku.lower() + if not args.build_sku in args.buildSKUs: + six.print_("--build-sku must be one of", args.buildSKUs, file=sys.stderr) + sys.exit(1) + args.buildSKUs = [args.build_sku] + if args.build_sku == "cpu" and args.devices == ["gpu"]: + print >>sys.stderr, "Invalid combination: --build-sku cpu and --device gpu" + sys.exit(1) + + if args.func == runCommand and not args.build_location: + args.build_location = os.path.realpath(os.path.join(thisDir, "../..", "x64" if windows else "build/")) + + if args.func == listCommand: + args.oses = ["windows", "linux"] + if (args.os): + args.os = args.os.lower() + if not args.os in args.oses: + six.print_("--os must be one of", args.oses, file=sys.stderr) + sys.exit(1) + args.oses = [args.os] + + # discover all the tests + Test.discoverAllTests() + + # execute the command + args.func(args) diff --git a/Tests/EndToEndTests/run-test-common b/Tests/EndToEndTests/run-test-common index 775329a09..23cc9d72e 100755 --- a/Tests/EndToEndTests/run-test-common +++ b/Tests/EndToEndTests/run-test-common @@ -28,10 +28,25 @@ DeleteModelsAfterTest=1 printHardwareInfo() { - cpuName=$(cat /proc/cpuinfo | grep -m 1 'model name' | cut -d ":" -f 2 | tr -s " ") - cpuCores=$(cat /proc/cpuinfo | grep -m 1 'cpu cores' | cut -d ":" -f 2 | tr -s " ") - totalMemory=$(cat /proc/meminfo | grep 'MemTotal' | cut -d ":" -f 2 | tr -s " ") + cpuName=$(cat /proc/cpuinfo 2> /dev/null | grep -m 1 'model name' | cut -d : -f 2- | tr -s " " | cut -c 2-) + totalMemory=$(cat /proc/meminfo 2> /dev/null | grep 'MemTotal' | cut -d : -f 2- | tr -s " " | cut -c 2-) nproc=$(nproc) + + # Note that MetricsDriver.py depends on this format + echo "Hardware info:" + echo " CPU Model Name: $cpuName" + echo " Hardware threads: $nproc" + echo " Total Memory: $totalMemory" + + nvidiaSmiPath="" + + if [ "$OS" == "Windows_NT" ]; then + nvidiaSmiPath="/cygdrive/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi" + else + nvidiaSmiPath="nvidia-smi" + fi + + if [ -f "$nvidiaSmiPath" ]; then gpuName="" gpuMem="" @@ -39,25 +54,19 @@ printHardwareInfo() gpuMemQuery="--query-gpu=memory.total --format=csv,noheader" if [ "$OS" == "Windows_NT" ]; then - nvidiaSmiPath="/cygdrive/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi" - if [ -f "$nvidiaSmiPath" ]; then gpuName=$("$nvidiaSmiPath" $gpuNameQuery | head -1) gpuMem=$("$nvidiaSmiPath" $gpuMemQuery | head -1) - fi else - gpuName=$(nvidia-smi $gpuNameQuery | head -1) - gpuMem=$(nvidia-smi $gpuMemQuery | head -1) + gpuName=$($nvidiaSmiPath $gpuNameQuery | head -1) + gpuMem=$($nvidiaSmiPath $gpuMemQuery | head -1) fi - echo -e "Hardware info: - CPU Model Name:$cpuName - CPU Cores:$cpuCores - Hardware threads: $nproc - Total Memory:$totalMemory" if [ ! -z "$gpuName" ]; then - echo -e " GPU Model Name: $gpuName - GPU Memory: $gpuMem" + echo " GPU Model Name: $gpuName" + echo " GPU Memory: $gpuMem" + fi fi + echo "-------------------------------------------------------------------" } # Helper function to print and run a command