MBLayout support for "write" command;

added sample to simple2d for "write"
This commit is contained in:
Frank Seide 2016-02-26 17:27:27 -08:00
Родитель 1a618bc708
Коммит 1cc857de0f
3 изменённых файлов: 112 добавлений и 69 удалений

Просмотреть файл

@ -5,14 +5,14 @@
RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
ModelDir = "$OutputDir$/Models"
# deviceId=-1 for CPU, >=0 for GPU devices, "auto" chooses the best GPU, or CPU if no usable GPU is available
deviceId = -1
command = Simple_Demo_Train:Simple_Demo_Test
command = Simple_Demo_Train:Simple_Demo_Test:Simple_Demo_Output
precision = "float"
traceLevel = 1
@ -129,6 +129,25 @@ Simple_Demo_Output=[
labelMappingFile = "$DataDir$/SimpleMapping.txt"
]
]
outputPath = "$OutputDir$/SimpleOutput" # Dump output as text
outputNodeNames = PosteriorProb : labels # Select the nodes to print. If this is not given, the nodes tagged as "output" are printed instead.
outputPath = "$OutputDir$/SimpleOutput" # Path to write to. ".NODENAME" will be appended.
#outputPath = "-" # As an alternative, this prints to stdout.
# extra formatting options
# This is configured to print the classified and ground-truth labels.
# To write out the actual posterior probabilities, comment out this section.
format = [
type = "category" # This finds the highest-scoring entry and prints its index.
labelMappingFile = "$DataDir$/SimpleMapping.txt" # Specifying this as well will translate the index into a string.
sequenceEpilogue = "\t// %s\n" # Append this on every line, %s = node name. Useful when printing to stdout.
# How to implement simple error counting with this (cd into $OutputDir$ first):
# grep PosteriorProb SimpleOutput.PosteriorProb | awk '{print $1}' > P
# grep labels SimpleOutput.labels | awk '{print $1}' > L
# diff L P | grep "<" | wc -l
# wc -l P
# The ratio of the two numbers gives the same error rate as ErrorPrediction/Sample in the log.
]
]

Просмотреть файл

@ -232,7 +232,6 @@ public:
// get it (into a flat CPU-side vector)
Matrix<ElemType>& outputValues = dynamic_pointer_cast<ComputationNode<ElemType>>(onode)->Value();
outputValues.CopyToArray(tempArray, tempArraySize);
ElemType* pCurValue = tempArray;
// sequence separator
FILE * f = *outputStreams[onode];
@ -242,77 +241,92 @@ public:
const auto elementSeparator = formattingOptions.Processed(onode->NodeName(), formattingOptions.elementSeparator);
const auto sampleSeparator = formattingOptions.Processed(onode->NodeName(), formattingOptions.sampleSeparator);
if (numMBsRun > 0 && !sequenceSeparator.empty())
fprintfOrDie(f, "%s", sequenceSeparator.c_str());
fprintfOrDie(f, "%s", sequencePrologue.c_str());
// output it according to our format specification
size_t T = outputValues.GetNumCols();
size_t dim = outputValues.GetNumRows();
if (formattingOptions.isCategoryLabel)
// process all sequences one by one
auto pMBLayout = onode->GetMBLayout();
const auto& sequences = pMBLayout->GetAllSequences();
size_t colStride = pMBLayout->GetNumParallelSequences() * outputValues.GetNumRows(); // how to get from one column to the next
size_t width = pMBLayout->GetNumTimeSteps();
for (size_t s = 0; s < sequences.size(); s++)
{
if (formatChar == 's') // verify label dimension
const auto& seqInfo = sequences[s];
size_t tBegin = seqInfo.tBegin >= 0 ? seqInfo.tBegin : 0;
size_t tEnd = seqInfo.tEnd <= width ? seqInfo.tEnd : width;
// current sequence is a matrix with 'colStride' beginning at the following pointer
ElemType* pCurValue = tempArray + s * outputValues.GetNumRows() + seqInfo.tBegin;
if ((numMBsRun > 0 || s > 0) && !sequenceSeparator.empty())
fprintfOrDie(f, "%s", sequenceSeparator.c_str());
fprintfOrDie(f, "%s", sequencePrologue.c_str());
// output it according to our format specification
size_t dim = outputValues.GetNumRows();
size_t T = tEnd - tBegin;
if (formattingOptions.isCategoryLabel)
{
if (dim != labelMapping.size())
InvalidArgument("write: Row dimension %d does not match number of entries %d in labelMappingFile '%ls'", (int)dim, (int)labelMapping.size(), formattingOptions.labelMappingFile.c_str());
}
// update the matrix in-place from one-hot (or max) to index
// find the max in each column
foreach_column(j, outputValues)
{
double maxPos = -1;
double maxVal = 0;
foreach_row(i, outputValues)
if (formatChar == 's') // verify label dimension
{
double val = pCurValue[i + j * dim];
if (maxPos < 0 || val >= maxVal)
if (outputValues.GetNumRows() != labelMapping.size())
InvalidArgument("write: Row dimension %d does not match number of entries %d in labelMappingFile '%ls'", (int)dim, (int)labelMapping.size(), formattingOptions.labelMappingFile.c_str());
}
// update the matrix in-place from one-hot (or max) to index
// find the max in each column
for (size_t j = 0; j < T; j++)
{
double maxPos = -1;
double maxVal = 0;
for (size_t i = 0; i < dim; i++)
{
maxPos = (double)i;
maxVal = val;
double val = pCurValue[i + j * dim * colStride];
if (maxPos < 0 || val >= maxVal)
{
maxPos = (double)i;
maxVal = val;
}
}
pCurValue[0 + j * colStride] = (ElemType)maxPos; // overwrite first element in-place
}
dim = 1; // ignore remaining dimensions
}
size_t iend = formattingOptions.transpose ? dim : T;
size_t jend = formattingOptions.transpose ? T : dim;
size_t istride = formattingOptions.transpose ? 1 : colStride;
size_t jstride = formattingOptions.transpose ? colStride : 1;
for (size_t j = 0; j < jend; j++)
{
if (j > 0)
fprintfOrDie(f, "%s", sampleSeparator.c_str());
for (size_t i = 0; i < iend; i++)
{
if (i > 0)
fprintfOrDie(f, "%s", elementSeparator.c_str());
if (formatChar == 'f') // print as real number
{
double dval = pCurValue[i * istride + j * jstride];
fprintfOrDie(f, valueFormatString.c_str(), dval);
}
else if (formatChar == 'u') // print category as integer index
{
unsigned int uval = (unsigned int) pCurValue[i * istride + j * jstride];
fprintfOrDie(f, valueFormatString.c_str(), uval);
}
else if (formatChar == 's') // print category as a label string
{
size_t uval = (size_t) pCurValue[i * istride + j * jstride];
assert(uval < labelMapping.size());
const char * sval = labelMapping[uval].c_str();
fprintfOrDie(f, valueFormatString.c_str(), sval);
}
}
pCurValue[j] = (ElemType) maxPos; // overwrite in-place, assuming a flat vector
}
dim = 1;
}
size_t iend = formattingOptions.transpose ? dim : T;
size_t jend = formattingOptions.transpose ? T : dim;
size_t istride = formattingOptions.transpose ? 1 : jend;
size_t jstride = formattingOptions.transpose ? iend : 1;
for (size_t j = 0; j < jend; j++)
{
if (j > 0)
fprintfOrDie(f, "%s", sampleSeparator.c_str());
for (size_t i = 0; i < iend; i++)
{
if (i > 0)
fprintfOrDie(f, "%s", elementSeparator.c_str());
if (formatChar == 'f') // print as real number
{
double dval = pCurValue[i * istride + j * jstride];
fprintfOrDie(f, valueFormatString.c_str(), dval);
}
else if (formatChar == 'u') // print category as integer index
{
unsigned int uval = (unsigned int) pCurValue[i * istride + j * jstride];
fprintfOrDie(f, valueFormatString.c_str(), uval);
}
else if (formatChar == 's') // print category as a label string
{
size_t uval = (size_t) pCurValue[i * istride + j * jstride];
assert(uval < labelMapping.size());
const char * sval = labelMapping[uval].c_str();
fprintfOrDie(f, valueFormatString.c_str(), sval);
}
}
}
fprintfOrDie(f, "%s", sequenceEpilogue.c_str());
}
fprintfOrDie(f, "%s", sequenceEpilogue.c_str());
} // end loop over sequences
} // end loop over nodes
totalEpochSamples += actualMBSize;
fprintf(stderr, "Minibatch[%lu]: ActualMBSize = %lu\n", ++numMBsRun, actualMBSize);
}
} // end loop over minibatches
for (auto & onode : outputNodes)
{
@ -322,7 +336,7 @@ public:
delete[] tempArray;
fprintf(stderr, "Total Samples Evaluated = %lu\n", totalEpochSamples);
fprintf(stderr, "Written to %ls*\nTotal Samples Evaluated = %lu\n", outputPath.c_str(), totalEpochSamples);
// flush all files (where we can catch errors) so that we can then destruct the handle cleanly without error
for (auto & iter : outputStreams)

Просмотреть файл

@ -61,9 +61,19 @@ COMMAND: currentDirectory=$(SolutionDir)ExampleSetups\Image\MNIST configFil
--- Image/QuickE2E:
COMMAND: configFile=$(SolutionDir)Tests\EndToEndTests\Image\QuickE2E\cntk.cntk RunDir=$(SolutionDir)Tests\EndToEndTests\Image\_run DataDir=$(SolutionDir)Tests\EndToEndTests\Image\Data ConfigDir=$(SolutionDir)Tests\EndToEndTests\Image\QuickE2E stderr=$(SolutionDir)Tests\EndToEndTests\RunDir\Image\QuickE2E\models\cntkImage.dnn.log DeviceId=0 useCuDnn=false makeMode=false
COMMAND: configFile=$(SolutionDir)Tests/EndToEndTests/Image/QuickE2E/cntk.cntk RunDir=$(SolutionDir)Tests/EndToEndTests/Image/_run DataDir=$(SolutionDir)Tests/EndToEndTests/Image/Data ConfigDir=$(SolutionDir)Tests/EndToEndTests/Image/QuickE2E stderr=$(SolutionDir)Tests/EndToEndTests/RunDir/Image/QuickE2E/models/cntkImage.dnn.log DeviceId=0 useCuDnn=false makeMode=false
--- Other/Simple2d:
Examples/Other/Simple2d /README.md
Examples\Other\Simple2d\Config\Simple.cntk
COMMAND: configFile=$(SolutionDir)Examples/Other/Simple2d/Config/Simple.cntk RunDir=$(SolutionDir)Examples/Other/Simple2d/_run DataDir=$(SolutionDir)Examples/Other/Simple2d/Data ConfigDir=$(SolutionDir)Examples/Other/Simple2d/Config stderr=$(SolutionDir)Examples/Other/Simple2d/_run/Simple.log DeviceId=0 useCuDnn=false makeMode=false
cntk configFile=Config/Simple.cntk currentDirectory=Data
Simple test
-----------
COMMAND: currentDirectory=$(SolutionDir)Demos\Simple configFile=Simple.cntk stderr=RunDir\Simple.cntk.log RootDir=$(SolutionDir) DeviceNumber=-1
COMMAND: currentDirectory=$(SolutionDir)Demos/Simple configFile=Simple.cntk stderr=RunDir/Simple.cntk.log RootDir=$(SolutionDir) DeviceNumber=-1