190 строки
7.6 KiB
Python
190 строки
7.6 KiB
Python
#!/usr/bin/env python
|
|
# ----------------------------------------------------------
|
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
# Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
|
|
# ---------------------------------------------------------
|
|
# This script extracts information (hardware used, final results) contained in the baselines files
|
|
# and generates a markdown file (wiki page)
|
|
|
|
import sys, os, re
|
|
import TestDriver as td
|
|
|
|
try:
|
|
import six
|
|
except ImportError:
|
|
print("Python package 'six' not installed. Please run 'pip install six'.")
|
|
sys.exit(1)
|
|
|
|
thisDir = os.path.dirname(os.path.realpath(__file__))
|
|
windows = os.getenv("OS")=="Windows_NT"
|
|
|
|
class Baseline:
|
|
def __init__(self, fullPath, testResult = "", trainResult = ""):
|
|
self.fullPath = fullPath
|
|
self.cpuInfo = ""
|
|
self.gpuInfo = ""
|
|
self.testResult = testResult
|
|
self.trainResult = trainResult
|
|
|
|
# extracts results info. e.g.
|
|
# Finished Epoch[ 5 of 5]: [Training] ce = 2.32253198 * 1000 err = 0.90000000 * 1000 totalSamplesSeen = 5000 learningRatePerSample = 2e-06 epochTime=0.175781
|
|
# Final Results: Minibatch[1-1]: err = 0.90000000 * 100 ce = 2.32170486 * 100 perplexity = 10.1930372
|
|
def extractResultsInfo(self, baselineContent):
|
|
trainResults = re.findall('.*(Finished Epoch\[ *\d+ of \d+\]\: \[Training\]) (.*)', baselineContent)
|
|
if trainResults:
|
|
self.trainResult = Baseline.formatLastTrainResult(trainResults[-1])[0:-2]
|
|
testResults = re.findall('.*(Final Results: Minibatch\[1-\d+\]:)(\s+\* \d+)?\s+(.*)', baselineContent)
|
|
if testResults:
|
|
self.testResult = Baseline.formatLastTestResult(testResults[-1])[0:-2]
|
|
|
|
# extracts cpu and gpu info from baseline content. e.g.:
|
|
#CPU info:
|
|
# CPU Model Name: Intel(R) Xeon(R) CPU E5-2620 v3 @ 2.40GHz
|
|
# Hardware threads: 12
|
|
#GPU info:
|
|
#
|
|
#Device[0]: cores = 2496; computeCapability = 5.2; type = "Quadro M4000"; memory = 8192 MB
|
|
#Device[1]: cores = 96; computeCapability = 2.1; type = "Quadro 600"; memory = 1024 MB
|
|
# Total Memory: 33474872 kB
|
|
def extractHardwareInfo(self, baselineContent):
|
|
startCpuInfoIndex = baselineContent.find("CPU info:")
|
|
endCpuInfoIndex = baselineContent.find("----------", startCpuInfoIndex)
|
|
cpuInfo = re.search("^CPU info:\s+"
|
|
"CPU Model (Name:\s*.*)\s+"
|
|
"(Hardware threads: \d+)\s+"
|
|
"Total (Memory:\s*.*)\s+", baselineContent[startCpuInfoIndex:endCpuInfoIndex], re.MULTILINE)
|
|
if cpuInfo is None:
|
|
return
|
|
self.cpuInfo = "\n".join(cpuInfo.groups())
|
|
|
|
startGpuInfoIndex = baselineContent.find("GPU info:")
|
|
endGpuInfoIndex = baselineContent.find("----------", startGpuInfoIndex)
|
|
gpuInfoSnippet = baselineContent[startGpuInfoIndex:endGpuInfoIndex]
|
|
|
|
gpuDevices = re.findall("\t\t(Device\[\d+\]: cores = \d+; computeCapability = \d\.\d; type = .*; memory = \d+ MB)[\r\n]?", gpuInfoSnippet)
|
|
if not gpuDevices:
|
|
return
|
|
gpuInfo = [ device for device in gpuDevices ]
|
|
self.gpuInfo = "\n".join(gpuInfo)
|
|
|
|
@staticmethod
|
|
def formatLastTestResult(line):
|
|
return line[0] + line[1] + "\n" + line[2].replace('; ', '\n').replace(' ','\n')
|
|
|
|
@staticmethod
|
|
def formatLastTrainResult(line):
|
|
epochsInfo, parameters = line[0], line[1]
|
|
return epochsInfo + '\n' + parameters.replace('; ', '\n')
|
|
|
|
class Example:
|
|
|
|
allExamplesIndexedByFullName = {}
|
|
|
|
def __init__(self, suite, name, testDir):
|
|
self.suite = suite
|
|
self.name = name
|
|
self.fullName = suite + "/" + name
|
|
self.testDir = testDir
|
|
self.baselineList = []
|
|
|
|
self.gitHash = ""
|
|
|
|
@staticmethod
|
|
def discoverAllExamples():
|
|
testsDir = thisDir
|
|
for dirName, subdirList, fileList in os.walk(testsDir):
|
|
if 'testcases.yml' in fileList:
|
|
testDir = dirName
|
|
exampleName = os.path.basename(dirName)
|
|
suiteDir = os.path.dirname(dirName)
|
|
# suite name will be derived from the path components
|
|
suiteName = os.path.relpath(suiteDir, testsDir).replace('\\', '/')
|
|
|
|
example = Example(suiteName, exampleName, testDir)
|
|
Example.allExamplesIndexedByFullName[example.fullName.lower()] = example
|
|
|
|
# it returns a list with all baseline files for current example
|
|
def findBaselineFilesList(self):
|
|
baselineFilesList = []
|
|
|
|
oses = [".windows", ".linux", ""]
|
|
devices = [".cpu", ".gpu", ""]
|
|
flavors = [".debug", ".release", ""]
|
|
|
|
for o in oses:
|
|
for device in devices:
|
|
for flavor in flavors:
|
|
candidateName = "baseline" + o + flavor + device + ".txt"
|
|
fullPath = td.cygpath(os.path.join(self.testDir, candidateName), relative=True)
|
|
if os.path.isfile(fullPath):
|
|
baseline = Baseline(fullPath);
|
|
baselineFilesList.append(baseline)
|
|
|
|
return baselineFilesList
|
|
|
|
# extracts information for every example and stores it in Example.allExamplesIndexedByFullName
|
|
def getExamplesMetrics():
|
|
Example.allExamplesIndexedByFullName = list(sorted(Example.allExamplesIndexedByFullName.values(), key=lambda test: test.fullName))
|
|
allExamples = Example.allExamplesIndexedByFullName
|
|
|
|
print ("CNTK - Metrics collector")
|
|
|
|
for example in allExamples:
|
|
baselineListForExample = example.findBaselineFilesList()
|
|
six.print_("Example: " + example.fullName)
|
|
for baseline in baselineListForExample:
|
|
with open(baseline.fullPath, "r") as f:
|
|
baselineContent = f.read()
|
|
gitHash = re.search('.*Build SHA1:\s([a-z0-9]{40})[\r\n]+', baselineContent, re.MULTILINE)
|
|
if gitHash is None:
|
|
continue
|
|
example.gitHash = gitHash.group(1)
|
|
baseline.extractHardwareInfo(baselineContent)
|
|
baseline.extractResultsInfo(baselineContent)
|
|
example.baselineList.append(baseline)
|
|
|
|
# creates a list with links to each example result
|
|
def createAsciidocExampleList(file):
|
|
for example in Example.allExamplesIndexedByFullName:
|
|
if not example.baselineList:
|
|
continue
|
|
file.write("".join(["<<", example.fullName.replace("/","").lower(),",", example.fullName, ">> +\n"]))
|
|
file.write("\n")
|
|
|
|
def writeMetricsToAsciidoc():
|
|
metricsFile = open("metrics.adoc",'wb')
|
|
|
|
createAsciidocExampleList(metricsFile)
|
|
|
|
for example in Example.allExamplesIndexedByFullName:
|
|
if not example.baselineList:
|
|
continue
|
|
metricsFile.write("".join(["===== ", example.fullName, "\n"]))
|
|
metricsFile.write("".join(["**Git Hash: **", example.gitHash, "\n\n"]))
|
|
metricsFile.write("[cols=3, options=\"header\"]\n")
|
|
metricsFile.write("|====\n")
|
|
metricsFile.write("|Log file / Configuration | Train Result | Test Result\n")
|
|
for baseline in example.baselineList:
|
|
pathInDir=baseline.fullPath.split(thisDir)[1][1:]
|
|
metricsFile.write("".join(["|link:../blob/", example.gitHash[:7],"/Tests/EndToEndTests/", pathInDir, "[",
|
|
baseline.fullPath.split("/")[-1], "] .2+|", baseline.trainResult.replace("\n", " "), " .2+|",
|
|
baseline.testResult.replace("\n", " "), "|\n"]))
|
|
cpuInfo = "".join(["CPU: ", re.sub("[\r]?\n", ' ', baseline.cpuInfo)])
|
|
|
|
gpuInfo = re.sub("[\r]?\n", ' ', baseline.gpuInfo)
|
|
if gpuInfo:
|
|
metricsFile.write("".join([cpuInfo, " GPU: ", gpuInfo]))
|
|
else:
|
|
metricsFile.write(cpuInfo)
|
|
|
|
metricsFile.write("\n|====\n\n")
|
|
|
|
# ======================= Entry point =======================
|
|
six.print_("==============================================================================")
|
|
|
|
Example.discoverAllExamples()
|
|
|
|
getExamplesMetrics()
|
|
|
|
writeMetricsToAsciidoc()
|