merged from master and resolved conflicts

This commit is contained in:
Frank Seide 2016-04-16 14:29:11 -07:00
Родитель 0f1ce6cd98 e87b4d6efd
Коммит 4c9f91868e
69 изменённых файлов: 12572 добавлений и 346 удалений

4
.gitignore поставляемый
Просмотреть файл

@ -150,6 +150,10 @@ GeneratedArtifacts/
_Pvt_Extensions/
ModelManifest.xml
# Python
*.pyc
__pychache__/
# =========================
# Windows detritus
# =========================

Просмотреть файл

@ -913,6 +913,58 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NetworkTests", "Tests\UnitT
{EAD17188-072C-4726-B840-A769C36DAD1B} = {EAD17188-072C-4726-B840-A769C36DAD1B}
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Text", "Text", "{8656B71D-E24C-4AC2-8BE4-C07B415A3E15}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SequenceClassification", "SequenceClassification", "{E53E63A0-FAA9-4416-9AD1-08A8FB87FEE1}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Miscellaneous", "Miscellaneous", "{8629430A-821E-43BA-AEC5-8B2CF31A2A7A}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CIFAR-10", "CIFAR-10", "{0141526B-F257-4574-8CBE-99634726FFCE}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "01_Convolution", "01_Convolution", "{58286327-6742-44C4-A34E-D2583419E55E}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.cpu.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.cpu.txt
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.gpu.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.gpu.txt
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.windows.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.windows.txt
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\run-test = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\run-test
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\testcases.yml = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "02_BatchNormConv", "02_BatchNormConv", "{AB9207B9-B134-4C57-B7ED-F3DCF7B0DC5F}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.linux.gpu.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.linux.gpu.txt
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.windows.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.windows.txt
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\run-test = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\run-test
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\testcases.yml = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "03_ResNet", "03_ResNet", "{12FB912C-43F8-40FE-BD7F-B52F589A1EBC}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\baseline.linux.gpu.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\baseline.linux.gpu.txt
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\baseline.windows.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\baseline.windows.txt
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\run-test = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\run-test
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\testcases.yml = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "04_ResNet", "04_ResNet", "{2BFE4D88-6F32-4701-887A-1DE3D7626DBB}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\baseline.linux.gpu.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\baseline.linux.gpu.txt
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\baseline.windows.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\baseline.windows.txt
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\run-test = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\run-test
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\testcases.yml = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\testcases.yml
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Config", "Config", "{EC780385-7580-4D15-914B-1D878A295CBC}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Text\SequenceClassification\Config\seqcla.cntk = Tests\EndToEndTests\Text\SequenceClassification\Config\seqcla.cntk
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{D11F76CC-DB6D-4CB4-B3B7-AB139DE2F5FA}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Text\SequenceClassification\Data\embeddingmatrix.txt = Tests\EndToEndTests\Text\SequenceClassification\Data\embeddingmatrix.txt
Tests\EndToEndTests\Text\SequenceClassification\Data\Train.txt = Tests\EndToEndTests\Text\SequenceClassification\Data\Train.txt
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug_CpuOnly|x64 = Debug_CpuOnly|x64
@ -1254,5 +1306,15 @@ Global
{48C2A9DE-FB2C-4724-9ADC-744216D79BCF} = {08A05A9A-4E45-42D5-83FA-719E99C04A30}
{2B1046A1-0140-43B7-B3DC-CF7DEEE1009E} = {8071EF60-30F7-4A77-81AA-ADCA0E18B1E3}
{CDA96AA3-3252-4978-A0BF-2ACD670823CB} = {6F19321A-65E7-4829-B00C-3886CD6C6EDE}
{8656B71D-E24C-4AC2-8BE4-C07B415A3E15} = {6E565B48-1923-49CE-9787-9BBB9D96F4C5}
{E53E63A0-FAA9-4416-9AD1-08A8FB87FEE1} = {8656B71D-E24C-4AC2-8BE4-C07B415A3E15}
{8629430A-821E-43BA-AEC5-8B2CF31A2A7A} = {FC7E7EC7-6E6A-4518-81C6-DA60451C657A}
{0141526B-F257-4574-8CBE-99634726FFCE} = {8629430A-821E-43BA-AEC5-8B2CF31A2A7A}
{58286327-6742-44C4-A34E-D2583419E55E} = {0141526B-F257-4574-8CBE-99634726FFCE}
{AB9207B9-B134-4C57-B7ED-F3DCF7B0DC5F} = {0141526B-F257-4574-8CBE-99634726FFCE}
{12FB912C-43F8-40FE-BD7F-B52F589A1EBC} = {0141526B-F257-4574-8CBE-99634726FFCE}
{2BFE4D88-6F32-4701-887A-1DE3D7626DBB} = {0141526B-F257-4574-8CBE-99634726FFCE}
{EC780385-7580-4D15-914B-1D878A295CBC} = {E53E63A0-FAA9-4416-9AD1-08A8FB87FEE1}
{D11F76CC-DB6D-4CB4-B3B7-AB139DE2F5FA} = {E53E63A0-FAA9-4416-9AD1-08A8FB87FEE1}
EndGlobalSection
EndGlobal

Просмотреть файл

@ -586,7 +586,6 @@ CNTK_SRC =\
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptEvaluator.cpp \
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptParser.cpp \
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptTest.cpp \
$(SOURCEDIR)/CNTK/BrainScript/ExperimentalNetworkBuilder.cpp \
$(SOURCEDIR)/Common/BestGpu.cpp \
$(SOURCEDIR)/Common/MPIWrapper.cpp \

Просмотреть файл

@ -209,7 +209,8 @@ template <typename ElemType>
void DoWriteOutput(const ConfigParameters& config)
{
ConfigParameters readerConfig(config(L"reader"));
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
// Why?
//readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
readerConfig.Insert("randomize", "None"); // we don't want randomization when output results
DataReader testDataReader(readerConfig);

Просмотреть файл

@ -73,14 +73,16 @@ void NDLNodeEvaluatorImpl<ElemType>::Evaluate(NDLNode<ElemType>* node, const wst
size_t i = 0;
auto tensorShape = ProcessTensorShapeParameters(node, params, i, /*isImage=*/false, cnNodeType);
wstring dynamicAxis = node->GetOptionalParameter("dynamicAxis", "");
// TODO: Map dynamicAxis from name to node at this point, where that node is memoized inside NDL.
// first look for this node already existing in the network
// BUGBUG: How does this set the dimensions then?
if (m_net->NodeNameExists(name))
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net->GetNodeFromName(name));
else if (isSparse)
nodePtr = builder.CreateSparseInputNode(name, tensorShape);
nodePtr = builder.CreateSparseInputNode(name, tensorShape, dynamicAxis);
else
nodePtr = builder.CreateInputNode(name, tensorShape);
nodePtr = builder.CreateInputNode(name, tensorShape, dynamicAxis);
}
}
else if (cnNodeType == L"ImageInput" || cnNodeType == L"SparseImageInput")
@ -97,11 +99,12 @@ void NDLNodeEvaluatorImpl<ElemType>::Evaluate(NDLNode<ElemType>* node, const wst
size_t imageHeight = ((NDLNode<ElemType>*) params[1])->GetScalar();
size_t imageChannels = ((NDLNode<ElemType>*) params[2])->GetScalar();
ImageLayoutKind imageLayoutKind = ImageLayoutKindFrom(node->GetOptionalParameter("imageLayout", "HWC"));
wstring dynamicAxis = node->GetOptionalParameter("dynamicAxis", "");
if (isSparse)
nodePtr = builder.CreateSparseInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind));
nodePtr = builder.CreateSparseInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind), dynamicAxis);
else
nodePtr = builder.CreateInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind));
nodePtr = builder.CreateInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind), dynamicAxis);
}
}
else if (OperationNameOf(LearnableParameter) == cnNodeType || cnNodeType == L"ImageParameter")

Просмотреть файл

@ -34,10 +34,12 @@ Parameter = LearnableParameter // deprecated
# TODO: make Parameter take tensor dims?
ParameterTensor(dims, learningRateMultiplier = 1.0, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initFromLiteral = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]
ConstantFromString(literal, tag='') = ParameterTensor((0)/*dim, will be inferred*/, init = 'fromLiteral', initFromLiteral = literal, learningRateMultiplier = 0.0)
Input(dims, tag='feature') = new ComputationNode [ operation = 'InputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]
SparseInput(dims, tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]
ImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', tag='feature') = new ComputationNode [ operation = 'InputValue' ; isImage = true /*plus the function args*/ ]
SparseImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; isImage = true /*plus the function args*/ ]
DynamicAxis(tag='') = new ComputationNode [ operation = 'DynamicAxis' ; /*plus the function args*/ ]
Input(dims, dynamicAxis='', tag='feature') = new ComputationNode [ operation = 'InputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]
# TODO: change from dynamicAxis by name to dynamicAxis being an actual object
SparseInput(dims, dynamicAxis='', tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]
ImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', dynamicAxis='', tag='feature') = new ComputationNode [ operation = 'InputValue' ; isImage = true /*plus the function args*/ ]
SparseImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', dynamicAxis='', tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; isImage = true /*plus the function args*/ ]
EnvironmentInput(propertyName, tag='') = new ComputationNode [ operation = 'EnvironmentInput' /*plus the function args*/ ]
ConstantTensor(val, dims, tag='') = ParameterTensor(dims, learningRateMultiplier = 0, init = 'fixedValue', value = val)
Constant(val, rows = 1, cols = 1, tag='') = Parameter(rows, cols, learningRateMultiplier = 0, init = 'fixedValue', value = val)
@ -79,8 +81,9 @@ Transpose(x) = TransposeDimensions(x, 1, 2)
Times(A, B, outputRank=1, tag='') = new ComputationNode [ operation = 'Times' ; inputs = ( A : B ) /*plus the function args*/ ]
Logistic(label, probability, tag='') = new ComputationNode [ operation = 'Logistic' ; inputs = (label : probability) /*plus the function args*/ ]
WeightedLogistic(label, probability, instanceWeight, tag='') = new ComputationNode [ operation = 'Logistic' ; inputs = (label : probability : instanceWeight) /*plus the function args*/ ]
ReconcileMBLayout(dataInput, layoutInput, tag='') = new ComputationNode [ operation = 'ReconcileMBLayout' ; inputs = (dataInput : layoutInput) /*plus the function args*/ ]
CastAs (type, data) = ReconcileMBLayout (data, type) # read as CastAs<type>(data) where the cast may consist of rearranging the data w.r.t. MBLayout or broadcasting across sequence items
ReconcileDynamicAxis(dataInput, layoutInput, tag='') = new ComputationNode [ operation = 'ReconcileDynamicAxis' ; inputs = (dataInput : layoutInput) /*plus the function args*/ ]
ReconcileMBLayout = ReconcileDynamicAxis # back compat
CastAs (type, data) = ReconcileDynamicAxis (data, type) # read as CastAs<type>(data) where the cast may consist of rearranging the data w.r.t. MBLayout or broadcasting across sequence items
Convolution(weightNode, inputValueNode, kernelDims, mapDims = 1, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', maxTempMemSizeInSamples = 0, tag='') = new ComputationNode [ operation = 'Convolution' ; inputs = (weightNode : inputValueNode); kernelShape = new TensorShape [ dims = kernelDims ] ; mapCount = new TensorShape [ dims = mapDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimSharing = new BoolVector [ items = sharing ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
Pooling(input, poolKind/*'max'|'average'*/, kernelDims, stride=1, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'Pooling' ; inputs = (input); pool = poolKind ; kernelShape = new TensorShape [ dims = kernelDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
MaxPooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'MaxPooling' ; inputs = input /*plus the function args*/ ]

Просмотреть файл

@ -1,134 +0,0 @@
#if 0 // this entire file can be removed once CNTK.core.bs works
// ExperimentalNetworkBuilder.cpp -- interface to new version of NDL (and config) parser --fseide
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
#include <string>
using namespace std;
// TODO: move to actual text files to be included
wstring standardFunctions =
L"Print(value, format='') = new PrintAction [ what = value /*; how = format*/ ] \n"
L"Debug(value, say = '', enabled = true) = new Debug [ /*macro arg values*/ ] \n"
L"Format(value, format) = new StringFunction [ what = 'Format' ; arg = value ; how = format ] \n"
L"Replace(s, from, to) = new StringFunction [ what = 'Replace' ; arg = s ; replacewhat = from ; withwhat = to ] \n"
L"Substr(s, begin, num) = new StringFunction [ what = 'Substr' ; arg = s ; pos = begin ; chars = num ] \n"
L"Chr(c) = new StringFunction [ what = 'Chr' ; arg = c ] \n"
L"Floor(x) = new NumericFunction [ what = 'Floor' ; arg = x ] \n"
L"Length(x) = new NumericFunction [ what = 'Length' ; arg = x ] \n"
L"Ceil(x) = -Floor(-x) \n"
L"Round(x) = Floor(x+0.5) \n"
L"Sign(x) = if x > 0 then 1 else if x < 0 then -1 else 0 \n"
L"Min(a,b) = if a < b then a else b \n"
L"Max(a,b) = if a > b then a else b \n"
L"Fac(n) = if n > 1 then Fac(n-1) * n else 1 \n";
wstring commonMacros =
L"BFF(in, rows, cols) = [ B = Parameter(rows, 1, init = 'fixedValue', value = 0) ; W = Parameter(rows, cols) ; z = W*in+B ] \n"
L"SBFF(in, rows, cols) = [ Eh = Sigmoid(BFF(in, rows, cols).z) ] \n "
L"MeanVarNorm(feat) = PerDimMeanVarNormalization(feat, Mean(feat), InvStdDev(feat)) \n"
L"LogPrior(labels) = Log(Mean(labels)) \n";
wstring computationNodes = // TODO: use actual TypeName() here? would first need to make it a wide string; we should also extract those two methods into the base macro
L"LearnableParameter(rows, cols, learningRateMultiplier = 1.0, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ dims = (rows : cols) ] /*plus the function args*/ ]\n"
L"Parameter = LearnableParameter // deprecated \n"
L"ParameterTensor(dims, learningRateMultiplier = 1.0, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
// TODO: ImageParameter?
// ^^ already works; vv untested
L"Input(dims, tag='feature') = new ComputationNode [ operation = 'InputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]\n" // note: naming a little inconsistent // TODO: re-test after flag change
L"SparseInput(dims, tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]\n"
L"ImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', tag='feature') = new ComputationNode [ operation = 'InputValue' ; isImage = true /*plus the function args*/ ]\n"
L"SparseImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; isImage = true /*plus the function args*/ ]\n"
L"Constant(val, rows = 1, cols = 1, tag='') = Parameter(rows, cols, learningRateMultiplier = 0, init = 'fixedValue', value = val) \n"
L"PastValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'PastValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
L"FutureValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'FutureValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
// TODO: ^^ DelayedValues no longer need to know their dimension. That is inferred in Validation.
L"Shift(input, fromOffset, boundaryValue, boundaryMode=-1/*context*/, dim=-1, tag='') = new ComputationNode [ operation = 'Shift' ; inputs = (input : boundaryValue) /*plus the function args*/ ]\n"
L"RowSlice(startIndex, numRows, input, tag='') = new ComputationNode [ operation = 'RowSlice' ; inputs = input /*plus the function args*/ ]\n"
L"RowRepeat(input, numRepeats, tag='') = new ComputationNode [ operation = 'RowRepeat' ; inputs = input /*plus the function args*/ ]\n"
L"RowStack(inputs, tag='') = new ComputationNode [ operation = 'RowStack' /*plus the function args*/ ]\n"
L"Reshape(input, numRows, imageWidth = 0, imageHeight = 0, imageChannels = 0, tag='') = new ComputationNode [ operation = 'LegacyReshape' ; inputs = input /*plus the function args*/ ]\n"
L"NewReshape(input, dims, beginDim=0, endDim=0, tag='') = new ComputationNode [ operation = 'Reshape' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
L"ReshapeDimension(x, dim, tensorShape) = NewReshape(x, tensorShape, beginDim=dim, endDim=dim + 1) \n"
L"FlattenDimensions(x, dim, num) = NewReshape(x, 0, beginDim=dim, endDim=dim + num) \n"
L"SplitDimension(x, dim, N) = ReshapeDimension(x, dim, 0:N) \n"
L"TransposeDimensions(input, dim1, dim2, tag='') = new ComputationNode [ operation = 'TransposeDimensions' ; inputs = input /*plus the function args*/ ]\n"
L"Transpose(x) = TransposeDimensions(x, 1, 2)\n"
L"Times(A, B, outputRank=1, tag='') = new ComputationNode [ operation = 'Times' ; inputs = ( A : B ) /*plus the function args*/ ]\n"
// TODO: Logistic should be generated with with BinaryStandardNode macro below.
L"Logistic(label, probability, tag='') = new ComputationNode [ operation = 'Logistic' ; inputs = (label : probability) /*plus the function args*/ ]\n"
L"WeightedLogistic(label, probability, instanceWeight, tag='') = new ComputationNode [ operation = 'Logistic' ; inputs = (label : probability : instanceWeight) /*plus the function args*/ ]\n"
L"ReconcileMBLayout(dataInput, layoutInput, tag='') = new ComputationNode [ operation = 'ReconcileMBLayout' ; inputs = (dataInput : layoutInput) /*plus the function args*/ ]\n"
L"Convolution(weightNode, inputValueNode, kernelWidth, kernelHeight, outputChannels, horizontalSubsample, verticalSubsample, zeroPadding = false, maxTempMemSizeInSamples = 0, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'Convolution' ; inputs = (weightNode : inputValueNode) /*plus the function args*/ ]\n"
L"MaxPooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'MaxPooling' ; inputs = input /*plus the function args*/ ]\n"
L"AveragePooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'AveragePooling' ; inputs = input /*plus the function args*/ ]\n"
// TODO: define DelayedValue, with negative delay for future; cannot do this yet, need to be able to say something like delay = -(^.delay)
// aliases
L"ColumnwiseCrossProduct = KhatriRaoProduct // deprecated \n" // TODO: should it be deprecated? It is described as easier to understand in the CNTKBook.
L"ClassificationError = ErrorPrediction \n"
L"Delay = PastValue \n" // TODO: should it allow negative offsets and an if test here?
L"BatchNormalization(input, scale, bias, runMean, runInvStdDev, eval, spatial, normalizationTimeConstant = 0, epsilon = 0.00001, useCntkEngine = true, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'BatchNormalization' ; inputs = (input : scale : bias : runMean : runInvStdDev) /*plus the function args*/ ]\n"
// standard nodes. We use macros to define these strings.
#define UnaryStandardNode(Op, a) L## #Op L"(" L## #a L", tag='') = new ComputationNode [ operation = '" L## #Op L"' ; inputs = " L## #a L" /*plus the function args*/ ]\n"
#define BinaryStandardNode(Op, a, b) L## #Op L"(" L## #a L", " L## #b L", tag='') = new ComputationNode [ operation = '" L## #Op L"' ; inputs = (" L## #a L" : " L## #b L") /*plus the function args*/ ]\n"
#define TernaryStandardNode(Op, a, b, c) L## #Op L"(" L## #a L", " L## #b L", " L## #c L", tag='') = new ComputationNode [ operation = '" L## #Op L"' ; inputs = (" L## #a L" : " L## #b L" : " L## #c L") /*plus the function args*/ ]\n"
#define QuaternaryStandardNode(Op, a, b, c, d) L## #Op L"(" L## #a L", " L## #b L", " L## #c L", " L## #d L", tag='') = new ComputationNode [ operation = '" L## #Op L"' ; inputs = (" L## #a L" : " L## #b L" : " L## #c L" : " L## #d L") /*plus the function args*/ ]\n"
#ifdef COMING_SOON
TernaryStandardNode(CRF, labelVectorSequence, positionDependenScoreVectorSequence, transitionScores) // TODO: better names
#endif
UnaryStandardNode(Abs, x)
QuaternaryStandardNode(ClassBasedCrossEntropyWithSoftmax, labelClassDescriptorVectorSequence, mainInputInfo, mainWeight, classLogProbsBeforeSoftmax)
// BUGBUG: the commented-out ones are not mentioned in the CNTK book, nor are their parameters documented in the source code
BinaryStandardNode(ColumnElementTimes, aVectorSequence, anotherVectorSequence)
BinaryStandardNode(CosDistance, aVectorSequence, anotherVectorSequence)
QuaternaryStandardNode(CosDistanceWithNegativeSamples, aVectorSequence, anotherVectorSequence, numShifts, numNegSamples)
//BinaryStandardNode(CosDistanceWithNegativeSamplesNode)
UnaryStandardNode(Cosine, x)
BinaryStandardNode(CrossEntropy, refProbVectorSequence, outProbVectorSequence)
BinaryStandardNode(CrossEntropyWithSoftmax, labelVectorSequence, outProbVectorSequence)
BinaryStandardNode(DiagTimes, diagonalMatrixAsColumnVector, matrix)
UnaryStandardNode(Dropout, activationVectorSequence)
//BinaryStandardNode(DummyCriterionNode)
BinaryStandardNode(ElementTimes, aMatrix, anotherMatrix)
BinaryStandardNode(ErrorPrediction, labelVectorSequence, outVectorSequence) // CNTKBook: ClassificationError?
UnaryStandardNode(Exp, x)
QuaternaryStandardNode(GMMLogLikelihood, unnormalizedPriorVector, meansAsRows, logStdDevAsRows, dataVectorSequence)
UnaryStandardNode(InvStdDev, dataVectorSequence)
BinaryStandardNode(KhatriRaoProduct, leftMatrix, rightMatrix)
//BinaryStandardNode(LSTMNode)
UnaryStandardNode(Log, x)
UnaryStandardNode(LogSoftmax, z)
//BinaryStandardNode(LookupTableNode)
UnaryStandardNode(MatrixL1Reg, matrix)
UnaryStandardNode(MatrixL2Reg, matrix)
// BUGBUG: CNTKBook also mentions L1Norm and L2Norm
UnaryStandardNode(Mean, dataVectorSequence)
BinaryStandardNode(Minus, leftMatrix, rightMatrix)
UnaryStandardNode(Negate, input)
TernaryStandardNode(PerDimMeanVarDeNormalization, dataVectorSequence, meanVector, invStdDevVector) // TODO: correct?
TernaryStandardNode(PerDimMeanVarNormalization, dataVectorSequence, meanVector, invStdDevVector)
BinaryStandardNode(Plus, leftMatrix, rightMatrix)
UnaryStandardNode(RectifiedLinear, z)
//BinaryStandardNode(RowElementTimesNode)
BinaryStandardNode(Scale, scalarScalingFactor, matrix)
#ifdef COMING_SOON
//BinaryStandardNode(SequenceDecoderNode)
#endif
UnaryStandardNode(Sigmoid, z)
UnaryStandardNode(Softmax, z)
UnaryStandardNode(Hardmax, z)
BinaryStandardNode(SquareError, aMatrix, anotherMatrix)
UnaryStandardNode(SumColumnElements, z)
UnaryStandardNode(SumElements, matrix)
UnaryStandardNode(Tanh, z)
UnaryStandardNode(TimeReverse, vectorSequence)
BinaryStandardNode(TransposeTimes, leftMatrix, rightMatrix)
// those nodes are deprecated, we won't implement them in BS:
//BinaryStandardNode(NoiseContrastiveEstimationNode)
//BinaryStandardNode(ParallelNode)
//BinaryStandardNode(StrideTimesNode)
;
#endif

Просмотреть файл

@ -205,7 +205,6 @@
<ClCompile Include="BrainScript\BrainScriptEvaluator.cpp" />
<ClCompile Include="BrainScript\BrainScriptParser.cpp" />
<ClCompile Include="BrainScript\BrainScriptTest.cpp" />
<ClCompile Include="BrainScript\ExperimentalNetworkBuilder.cpp" />
<ClCompile Include="CNTK.cpp" />
<ClCompile Include="ModelEditLanguage.cpp" />
<ClCompile Include="stdafx.cpp" />

Просмотреть файл

@ -44,9 +44,6 @@
<ClCompile Include="BrainScript\BrainScriptTest.cpp">
<Filter>BrainScript</Filter>
</ClCompile>
<ClCompile Include="BrainScript\ExperimentalNetworkBuilder.cpp">
<Filter>BrainScript</Filter>
</ClCompile>
<ClCompile Include="..\Common\ExceptionWithCallStack.cpp">
<Filter>Common</Filter>
</ClCompile>

Просмотреть файл

@ -31,6 +31,34 @@ static const char* GetReaderName(const string& precision)
InvalidArgument("DataReader: The 'precision' parameter must be 'float' or 'double'.");
}
void DataReaderBase::SetMinibatchLayout(StreamMinibatchInputs& minibatch)
{
assert(minibatch.begin() != minibatch.end());
auto& pMBLayout = minibatch.begin()->second.pMBLayout;
// This is only allowed for old readers, which support a single layout for all inputs.
for (const auto& iter : minibatch)
{
assert(iter.second.pMBLayout == pMBLayout);
// TODO: This should be a runtime check, not an assert() that only runs in Debug.
UNUSED(iter);
}
CopyMBLayoutTo(pMBLayout);
}
bool DataReaderBase::GetMinibatch(StreamMinibatchInputs& minibatch)
{
if (TryGetMinibatch(minibatch))
{
SetMinibatchLayout(minibatch);
return true;
}
return false;
}
template <class ConfigRecordType>
void DataReader::InitFromConfig(const ConfigRecordType& /*config*/)
{

Просмотреть файл

@ -59,28 +59,29 @@ public:
/*const*/ TensorShape sampleLayout;
// constructor
Input(MatrixBasePtr matrix, MBLayoutPtr pMBLayout, TensorShape sampleLayout) : matrix(matrix), pMBLayout(pMBLayout), sampleLayout(sampleLayout)
Input(MatrixBasePtr matrix, MBLayoutPtr pMBLayout, TensorShape sampleLayout) :
matrix(matrix), pMBLayout(pMBLayout), sampleLayout(sampleLayout)
{
assert(matrix);
}
Input(){} // some STL classes need this for general happiness
Input() {} // some STL classes need this for general happiness
// helper for typecasting the matrix pointer
template<class ElemType>
template<class ElemType>
Matrix<ElemType>& GetMatrix(const wchar_t* name/*for debugging only*/ = L"(unknown)") const
{
{
assert(matrix);
auto* matrixp = dynamic_cast<Matrix<ElemType>*>(matrix.get());
if (!matrixp)
{
// print a rather rich error to track down a regression failure
if (!matrixp)
{
// print a rather rich error to track down a regression failure
auto isFloat = !!dynamic_cast<Matrix<float>*> (matrix.get());
auto isDouble = !!dynamic_cast<Matrix<double>*>(matrix.get());
LogicError("GetMatrix<%s>: Attempted to access input stream '%ls' with wrong precision, got %s {%d,%d} instead of %s.",
typeid(ElemType).name(), name, typeid(matrix.get()).name(), (int)isFloat, (int)isDouble, typeid(Matrix<ElemType>*).name());
}
return *matrixp;
}
return *matrixp;
}
};
private:
@ -246,6 +247,21 @@ typedef std::shared_ptr<IDataReader> IDataReaderPtr;
extern "C" DATAREADER_API void GetReaderF(IDataReader** preader);
extern "C" DATAREADER_API void GetReaderD(IDataReader** preader);
// The sole purpose of this base class is to provide backwards compatibility for (old)
// readers that do not support multiple mb layouts.
class DataReaderBase : public IDataReader
{
protected:
// Verifies that all inputs share the same layout (have the same layout pointer)
// and copies the provided layout into the minibatch layout.
// This method is needed for backwards-compatibility and only meant to be used by old readers!
void SetMinibatchLayout(StreamMinibatchInputs& minibatch);
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices) = 0;
public:
virtual bool GetMinibatch(StreamMinibatchInputs& matrices) override;
};
// Data Reader class
// interface for clients of the Data Reader
// mirrors the IDataReader interface, except the Init method is private (use the constructor)
@ -292,7 +308,6 @@ class DataReader : public IDataReader, protected Plugin, public ScriptableObject
// NOTE: this destroys the object, and it can't be used past this point.
// The reason why this is not just a destructor is that it goes across a DLL boundary.
virtual void Destroy() override;
public:
// DataReader Constructor
// config - [in] configuration parameters for the datareader

Просмотреть файл

@ -100,29 +100,29 @@ struct MBLayout
{
return seqId == other.seqId && s == other.s && tBegin == other.tBegin && tEnd == other.tEnd;
}
size_t GetNumTimeSteps() const
{
return (size_t)(tEnd - tBegin);
}
size_t GetNumTimeSteps() const { return (size_t)(tEnd - tBegin); }
};
// -------------------------------------------------------------------
// construction
// -------------------------------------------------------------------
MBLayout(size_t numParallelSequences, size_t numTimeSteps)
MBLayout(size_t numParallelSequences, size_t numTimeSteps, const std::wstring &name)
: m_distanceToStart(CPUDEVICE), m_distanceToEnd(CPUDEVICE), m_columnsValidityMask(CPUDEVICE)
{
Init(numParallelSequences, numTimeSteps);
SetUniqueAxisName(name != L"" ? name : L"DynamicAxis");
}
MBLayout()
: MBLayout(1, 0)
: MBLayout(1, 0, L"")
{
}
// copy the content of another MBLayoutPtr over
// Use this instead of actual assignment to make it super-obvious that this is not copying the pointer but actual content. The pointer is kept fixed.
void CopyFrom(const MBLayoutPtr& other)
// Use "keepName" if the "identity" of the target is to be preserved, e.g.
// while copying from reader space to network space.
void CopyFrom(const MBLayoutPtr& other, bool keepName=false)
{
m_numTimeSteps = other->m_numTimeSteps;
m_numParallelSequences = other->m_numParallelSequences;
@ -141,7 +141,8 @@ struct MBLayout
m_columnsValidityMask.SetValue(other->m_columnsValidityMask);
m_writable = other->m_writable;
m_axisName = other->m_axisName;
if (!keepName)
m_axisName = other->m_axisName;
}
// Destructive copy that steals ownership if the content, like std::move()
@ -275,7 +276,7 @@ public:
}
// return all sequences stored in this minibatch
const vector<SequenceInfo> &GetAllSequences() const
const vector<SequenceInfo>& GetAllSequences() const
{
return m_sequences;
}
@ -287,7 +288,7 @@ public:
const Matrix<char>& GetColumnsValidityMask(DEVICEID_TYPE deviceId) const;
// compare whether two layouts are the same
bool operator==(const MBLayout &other) const
bool operator==(const MBLayout& other) const
{
if (this == &other)
return true;
@ -441,8 +442,8 @@ public:
bool HasGaps(const FrameRange &fr) const;
// test boundary flags for a specific condition
bool IsBeyondStartOrEnd(const FrameRange &fr) const;
bool IsGap(const FrameRange &fr) const;
bool IsBeyondStartOrEnd(const FrameRange& fr) const;
bool IsGap(const FrameRange& fr) const;
// test whether at least one sequence crosses the bounds of this minibatch
bool HasSequenceBeyondBegin() const
@ -555,7 +556,7 @@ private:
// Meant to guard in lazy creation of m_columnsValidityMask.
mutable bool m_writable;
// the axis
// The axis this MBLayout represents.
// For now only a string meant for debugging.
std::wstring m_axisName;
@ -751,6 +752,7 @@ inline bool MBLayout::HasGaps() const
{
return m_numGapFrames > 0; /*HasGaps(FrameRange());*/
}
inline bool MBLayout::HasGaps(const FrameRange &fr) const
{
CheckIsValid();
@ -828,7 +830,7 @@ inline size_t MBLayout::GetActualNumSamples() const { return m_numFramesDeclared
// only called from MaskMissingColumnsTo()
// TODO: Can probably be faster by using the sequence array directly.
// TODO: Or should we just blast m_distanceToStart to GPU, and maks based on that? It is small compared to features.
inline const Matrix<char> &MBLayout::GetColumnsValidityMask(DEVICEID_TYPE deviceId) const
inline const Matrix<char>& MBLayout::GetColumnsValidityMask(DEVICEID_TYPE deviceId) const
{
CheckIsValid();
// lazily compute the validity mask
@ -947,7 +949,7 @@ static inline std::pair<size_t, size_t> ColumnRangeWithMBLayoutFor(size_t numCol
// MBLayout of data and of FrameRange must be identical pointers,
// or in case of broadcasting, respective parent pointers.
// MBLayouts that are identical in content but not object identity (pointer) are not admissible.
// For those cases, use a ReconcileMBLayout node.
// For those cases, use a ReconcileDynamicAxis node.
if (fr.m_pMBLayout != pMBLayout)
{
// if broadcast allowed then it is allowed to broadcast from an outer-loop value
@ -955,9 +957,9 @@ static inline std::pair<size_t, size_t> ColumnRangeWithMBLayoutFor(size_t numCol
if (fr.m_broadcastAllowed && !pMBLayout && numCols == 1)
return std::pair<size_t, size_t>(0, numCols);
if (fr.m_pMBLayout && pMBLayout && *fr.m_pMBLayout == *pMBLayout)
LogicError("DataFor: FrameRange's MBLayout inconsistent with matrix. They are compatible though--are you missing a ReconcileMBLayout operation?");
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix. They are compatible though--are you missing a ReconcileDynamicAxis operation?");
else
LogicError("DataFor: FrameRange's MBLayout inconsistent with matrix.");
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix.");
}
// if FrameRange refers to whole minibatch (map mode)
// or if we don't even have a layout
@ -1040,7 +1042,7 @@ static inline std::pair<DimensionVector, DimensionVector> TensorSliceWithMBLayou
// MBLayout of data and of FrameRange must be identical pointers,
// or in case of broadcasting, respective parent pointers.
// MBLayouts that are identical in content but not object identity (pointer) are not admissible.
// For those cases, use a ReconcileMBLayout node.
// For those cases, use a ReconcileDynamicAxis node.
if (isTimeIteration && fr.m_pMBLayout != pMBLayout)
{
// if broadcast allowed then it is allowed to broadcast from an outer-loop value
@ -1048,10 +1050,10 @@ static inline std::pair<DimensionVector, DimensionVector> TensorSliceWithMBLayou
if (fr.m_pMBLayout /*get data for a loop*/ && !pMBLayout /*'data' is not samples*/ && fr.m_broadcastAllowed /*we're OK with that*/)
; // the time dimension is broadcasting--leave it as is
else if (fr.m_pMBLayout && pMBLayout && *fr.m_pMBLayout == *pMBLayout)
LogicError("DataFor: FrameRange's MBLayout inconsistent with matrix. They are compatible though--are you missing a ReconcileMBLayout operation? %s vs. %s",
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix. They are compatible though--are you missing a ReconcileDynamicAxis operation? %s vs. %s",
static_cast<string>(*(fr.m_pMBLayout)).c_str(), static_cast<string>(*(pMBLayout)).c_str());
else
LogicError("DataFor: FrameRange's MBLayout inconsistent with matrix: %s vs. %s",
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix: %s vs. %s",
static_cast<string>(*(fr.m_pMBLayout)).c_str(), static_cast<string>(*(pMBLayout)).c_str());
}
// if FrameRange refers to whole minibatch (map mode)
@ -1123,8 +1125,10 @@ static inline void MaskMissingColumnsTo(Matrix<ElemType>& matrixToMask, const MB
TensorView<ElemType>(matrixSliceToMask).DoMaskNegativeOf(0, TensorView<ElemType>(matrixSliceToMask), TensorView<ElemType>(maskSlice), 1); val;
#else
const auto& maskMatrix = pMBLayout->GetColumnsValidityMask(matrixToMask.GetDeviceId());
maskMatrix.TransferToDeviceIfNotThere(matrixToMask.GetDeviceId(), /*ismoved=*/ false, /*emptyTransfer=*/ false, /*updatePreferredDevice=*/ false);
auto maskSlice = DataWithMBLayoutFor(maskMatrix, fr, pMBLayout);
auto matrixSliceToMask = DataWithMBLayoutFor(matrixToMask, fr, pMBLayout);
matrixSliceToMask.MaskColumnsValue(maskSlice, val);
#endif

Просмотреть файл

@ -79,7 +79,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// - special case: swapping between sample and MBLayout, e.g. turn a sample dimension to a time dimension
// - Validate() stage will automatically infer tensor dimensions from inputs, and also infer downwards into LearnableParameters where requested
//
// Interfacing to and inplementation in Matrix lib:
// Interfacing to and implementation in Matrix lib:
// - a Tensor is realized as a type TensorView = { Matrix&, TensorShape& } (i.e. tensors don't own their memory)
// - Matrix lib will contain overloads for relevant operations that take Tensor& instead of Matrix&.
// - elementwise ops will go through a single bottleneck function that deals with matching dimensions (extend, broadcast) and flattening

Просмотреть файл

@ -50,13 +50,14 @@ public:
ComputationNetwork() :
m_randomSeedOffset(0),
m_isCompiled(false),
m_areMatricesAllocated(false),
m_pMBLayoutOfNetwork(make_shared<MBLayout>()),
m_isCompiled(false),
m_areMatricesAllocated(false),
m_pMBLayoutOfNetwork(make_shared<MBLayout>(1, 0, L"*")),
m_environment(make_shared<ComputationEnvironment>())
{
m_pMBLayoutOfNetwork->SetAxisName(L"T");
//m_pMBLayoutOfNetwork->SetAxisName(L"T");
}
ComputationNetwork(DEVICEID_TYPE deviceId)
: ComputationNetwork()
{
@ -289,6 +290,8 @@ public:
// This returns max number of columns over the feature nodes.
// Note that if we have multiple slices, MB size != #frames.
// BUGBUG: This will break once we have inconsistent layouts.
// BUGBUG: The number computed here is completely off (it the layout has gaps
// they will also be counted towards the actualMBSize)
size_t DetermineActualMBSizeFromFeatures() const
{
size_t actualMBSize = 0;

Просмотреть файл

@ -48,6 +48,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
else if (nodeType == OperationNameOf(DiagTimesNode)) return New<DiagTimesNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(DropoutNode)) return New<DropoutNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(DummyCriterionNode)) return New<DummyCriterionNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(DynamicAxisNode)) return New<DynamicAxisNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ElementTimesNode)) return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(EnvironmentInputNode)) return New<EnvironmentInputNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ErrorPredictionNode)) return New<ErrorPredictionNode<ElemType>>(forward<_Types>(_Args)...);
@ -75,7 +76,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
else if (nodeType == OperationNameOf(PerDimMeanVarDeNormalizationNode)) return New<PerDimMeanVarDeNormalizationNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(PassNode)) return New<PassNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(PlusNode)) return New<PlusNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ReconcileMBLayoutNode)) return New<ReconcileMBLayoutNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ReconcileDynamicAxisNode)) return New<ReconcileDynamicAxisNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ReciprocalNode)) return New<ReciprocalNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(RectifiedLinearNode)) return New<RectifiedLinearNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ReshapeNode)) return New<ReshapeNode<ElemType>>(forward<_Types>(_Args)...);
@ -110,6 +111,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
// TODO: DiagTimes is also an alias of ElementTimes; current separate implementation is unnecessary.
else if (nodeType == L"PerDimMeanVarNormalizationNode") return New<PerDimMeanVarNormalizationNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == L"PerDimMeanVarDeNormalizationNode") return New<PerDimMeanVarDeNormalizationNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == L"ReconcileMBLayout") return New<ReconcileDynamicAxisNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == L"RowElementTimes") return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == L"RowSlice") return New<SliceNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == L"Scale") return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
@ -193,28 +195,29 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Creat
return net.AddNodeToNetWithElemType(New<LearnableParameter<ElemType>>(net.GetDeviceId(), paramName, tensorShape));
}
// TODO: change these to take an actual object instead of a name for dynamicAxis
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateInputNode(const std::wstring& inputName, const size_t rows)
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateInputNode(const std::wstring& inputName, const size_t rows, const wstring& dynamicAxisName)
{
return net.AddNodeToNetWithElemType(New<InputValue<ElemType>>(net.GetDeviceId(), inputName, rows));
return net.AddNodeToNetWithElemType(New<InputValue<ElemType>>(net.GetDeviceId(), inputName, rows, dynamicAxisName));
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateSparseInputNode(const std::wstring& inputName, const size_t rows)
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateSparseInputNode(const std::wstring& inputName, const size_t rows, const wstring& dynamicAxisName)
{
return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, rows));
return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, rows, dynamicAxisName));
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateInputNode(const std::wstring& inputName, const TensorShape& sampleLayout)
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateInputNode(const std::wstring& inputName, const TensorShape& sampleLayout, const wstring& dynamicAxisName)
{
return net.AddNodeToNetWithElemType(New<InputValue<ElemType>>(net.GetDeviceId(), inputName, sampleLayout));
return net.AddNodeToNetWithElemType(New<InputValue<ElemType>>(net.GetDeviceId(), inputName, sampleLayout, dynamicAxisName));
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateSparseInputNode(const std::wstring& inputName, const TensorShape& imageLayout)
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateSparseInputNode(const std::wstring& inputName, const TensorShape& imageLayout, const wstring& dynamicAxisName)
{
return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, imageLayout));
return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, imageLayout, dynamicAxisName));
}
template <class ElemType>
@ -461,6 +464,12 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Pass(
return net.AddNodeToNetAndAttachInputs(New<PassNode<ElemType>>(net.GetDeviceId(), nodeName), { a });
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::DynamicAxis(const ComputationNodePtr a, const std::wstring& nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<DynamicAxisNode<ElemType>>(net.GetDeviceId(), nodeName), { a });
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::InvStdDev(const ComputationNodePtr a, const std::wstring nodeName)
{

Просмотреть файл

@ -48,10 +48,10 @@ public:
ComputationNodePtr CreateLearnableParameter(const std::wstring& paramName, const TensorShape& tensorShape);
// sparse matrix size is optionally specified
// ComputationNodePtr CreateSparseLearnableParameter(const std::wstring & paramName, const size_t rows, const size_t cols, const size_t size = 0);
ComputationNodePtr CreateInputNode(const std::wstring& inputName, const size_t rows);
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const size_t rows);
ComputationNodePtr CreateInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
ComputationNodePtr CreateInputNode(const std::wstring& inputName, const size_t rows, const wstring& dynamicAxisName = L"");
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const size_t rows, const wstring& dynamicAxisName = L"");
ComputationNodePtr CreateInputNode(const std::wstring& inputName, const TensorShape& sampleLayout, const wstring& dynamicAxisName = L"");
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const TensorShape& sampleLayout, const wstring& dynamicAxisName = L"");
ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape,
const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples);
@ -108,6 +108,7 @@ public:
ComputationNodePtr Dropout(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr DummyCriterion(const ComputationNodePtr objectives, const ComputationNodePtr derivatives, const ComputationNodePtr prediction, const std::wstring nodeName = L"");
ComputationNodePtr ElementTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr DynamicAxis(const ComputationNodePtr a, const std::wstring& nodeName = L"");
ComputationNodePtr ErrorPrediction(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr Exp(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr FutureValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, size_t timeStep, const std::wstring nodeName = L"");

Просмотреть файл

@ -517,9 +517,8 @@ void ComputationNetwork::DetermineSetOfAllRoots()
}
// initial setup of MBLayout pointers
// - link all input nodes to one or more MBLayouts --TODO: Currently only one
// - link all input nodes to one or more MBLayouts
// - reset all others to nullptr, in expectation of a ValidateNetwork() pass
// BUGBUG (Issue #95): Change this to use different MBLayouts for different inputs if so configured.
void ComputationNetwork::ResetMBLayouts()
{
// reset to a well-defined MBLayout (any meaningful layout should do here)
@ -530,10 +529,42 @@ void ComputationNetwork::ResetMBLayouts()
for (const auto& node : GetAllNodesForRoot(nullptr))
node->LinkToMBLayout(nullptr);
// then fix up inputs (all others get propagated upwards through Validate())
// BUGBUG (Issue #95): Once we support mismatching layouts, this will be more involved. For now, everything shares the one layout that the Network knows about.
// DynamicAxis nodes are (apart from the soon-to-be-deprecated network-wide MBLayout) the main holders of MBLayouts. Initialize them.
// The only other instances are nodes that change the MBLayout, like WhereNode.
for (auto node : GetNodesWithType(L"DynamicAxis"))
node->LinkToMBLayout(make_shared<MBLayout>(1, 0, node->GetName()));
// This is now initialized inside of the Input nodes, with the proper connections.
for (auto node : InputNodes(nullptr))
node->LinkToMBLayout(m_pMBLayoutOfNetwork);
{
// TODO: use if (!Is<ITakesDynamicAxis>(node))...
auto n = dynamic_pointer_cast<ITakesDynamicAxis>(node);
if (!n)
LogicError("Expected %ls to implement ITakesDynamicAxis, but it doesn't.", node->NodeDescription().c_str());
std::wstring axisName = n->GetRequestedDynamicAxis();
if (axisName == L"")
{
// Legacy behavior: One shared MBLayout
// TODO Remove m_pMBLayoutOfNetwork altogether. See issue 358.
node->LinkToMBLayout(m_pMBLayoutOfNetwork);
}
else
{
auto axisNode = GetNodeFromName(axisName);
if (!axisNode)
RuntimeError("%ls: Can't find node '%ls' for retrieving dynamic axis.", axisNode->NodeDescription().c_str(), axisName.c_str());
// For now we require the node to be a DynamicAxisNode, though we could derive the same from other nodes. This would involve
// more dependencies on the order in which things are evaluated, though.
if (axisNode->OperationName() != L"DynamicAxis")
RuntimeError("%ls: dynamicAxis argument must be of type DynamicAxis(), but got %ls.", node->NodeDescription().c_str(), axisNode->NodeDescription().c_str());
if (!axisNode->HasMBLayout())
LogicError("%ls: Expected %ls to have MBLayout, but it doesn't.", node->NodeDescription().c_str(), axisNode->NodeDescription().c_str());
node->LinkToMBLayout(axisNode->GetMBLayout());
}
}
}
// -----------------------------------------------------------------------
@ -661,6 +692,11 @@ size_t ComputationNetwork::ValidateNodes(list<ComputationNodeBasePtr> nodes, boo
{
hasVisitedChild |= child->m_visited; // if not a single visited child then no point in validating
allChildrenVisited &= child->m_visited;
// Make sure we don't use DynamicAxis in places where it was not designed for.
// This is a stop-gap. We need a more coherent concept for passing of shapes.
if (child->OperationName() == L"DynamicAxis")
RuntimeError("%ls: Cannot be used as input to another node. It can only be used on the 'dynamicAxis' property of an Input node.", child->NodeDescription().c_str());
}
// if there is not at least one visited child

Просмотреть файл

@ -100,7 +100,7 @@ void ComputationNodeBase::InferMBLayoutFromInputsForStandardCase(bool isFinalVal
else if (!pMBLayout) // first non-NULL layout: just copy it
pMBLayout = child->m_pMBLayout;
else if (pMBLayout != child->m_pMBLayout && isFinalValidationPass) // got a layout--compare whether it is the same
RuntimeError("%ls: InferMBLayoutFromInputsForStandardCase: Expected minibatch layouts to be the same between all children. Child '%ls' (%ls) uses a different layout than previously checked children and might get out of sync during runtime. If this is by design, use ReconcileMBLayout() to forward layouts between nodes.",
RuntimeError("%ls: InferMBLayoutFromInputsForStandardCase: Expected minibatch layouts to be the same between all children. Child '%ls' (%ls) uses a different layout than previously checked children and might get out of sync during runtime. If this is by design, use ReconcileDynamicAxis() to forward layouts between nodes.",
NodeDescription().c_str(), child->NodeName().c_str(), child->OperationName().c_str());
}
// all are consistent: install it
@ -130,7 +130,7 @@ void ComputationNodeBase::ValidateBinaryZip(bool isFinalValidationPass, bool all
if (isFinalValidationPass &&
Input(0)->GetMBLayout() != Input(1)->GetMBLayout() && Input(0)->HasMBLayout() && Input(1)->HasMBLayout())
{
LogicError("%ls: Minibatch layouts are not the same between arguments and might get out of sync during runtime. If this is by design, use ReconcileMBLayout() to forward layouts between nodes.", NodeDescription().c_str());
LogicError("%ls: Minibatch layouts are not the same between arguments and might get out of sync during runtime. If this is by design, use ReconcileDynamicAxis() to forward layouts between nodes.", NodeDescription().c_str());
}
// result has tensor shape with dimensions being the max over both
@ -176,6 +176,7 @@ void ComputationNodeBase::ValidateBinaryReduce(bool isFinalValidationPass)
ComputationNodeBase::Validate(isFinalValidationPass);
m_pMBLayout = nullptr; // this node does not hold mini-batch data
ValidateInferBinaryInputDims();
if (isFinalValidationPass)
{
if (!(Input(0)->GetSampleLayout().IsElementwiseCompatibleWith(Input(1)->GetSampleLayout())))
@ -338,18 +339,25 @@ TensorShape ComputationNodeBase::GetOneSampleTensorSliceFor(size_t rank, const F
prototype += "NULL";
continue;
}
prototype += msra::strfun::strprintf("[%s%ls]", string(child->m_sampleLayout).c_str(), child->GetMBLayoutAxisString().c_str());
prototype += child->ShapeDescription().c_str();
}
prototype += extraArgs;
//prototype += ")";
}
prototype += msra::strfun::strprintf(" -> [%s%ls]", string(GetSampleLayout()).c_str(), GetMBLayoutAxisString().c_str());
prototype += msra::strfun::strprintf(" -> %s", ShapeDescription().c_str());
return prototype;
}
const std::string ComputationNodeBase::ShapeDescription() const
{
return msra::strfun::strprintf("[%s%s%ls]",
string(m_sampleLayout).c_str(),
HasMBLayout() ? " x " : "",
HasMBLayout() ? GetMBLayout()->GetAxisName() : L"");
}
template <class ElemType>
/*virtual*/ void ComputationNode<ElemType>::DumpNodeInfo(const bool /*printValues*/, const bool printMetadata, File& fstream) const
{

Просмотреть файл

@ -36,7 +36,8 @@
#define CNTK_MODEL_VERSION_5 5 // ND convolution and pooling
#define CNTK_MODEL_VERSION_6 6 // Batch norm blending
#define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_7
#define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_8
extern bool g_shareNodeValueMatrices;
@ -553,9 +554,14 @@ public:
// helper for the factory function for ComputationNodes
static vector<ComputationNodeBasePtr> GetInputsFromConfig(const ScriptableObjects::IConfigRecordPtr configp)
{
return GetInputsFromConfig(configp, L"inputs");
}
static vector<ComputationNodeBasePtr> GetInputsFromConfig(const ScriptableObjects::IConfigRecordPtr configp, const std::wstring& property)
{
vector<ComputationNodeBasePtr> inputs;
const auto* inputsArg = configp->Find(L"inputs");
const auto* inputsArg = configp->Find(property);
if (inputsArg)
{
if (inputsArg->Is<ComputationNodeBase>()) // single arg
@ -817,6 +823,9 @@ public:
return std::wstring(L"Node '") + NodeName().c_str() + L"' (" + OperationName().c_str() + L" operation)";
};
// Helper that returns [a x b x c], including dynamic axes.
const std::string ShapeDescription() const;
protected:
// -----------------------------------------------------------------------
@ -851,7 +860,8 @@ protected:
typedef ComputationNodeBase::ComputationNodeBasePtr ComputationNodeBasePtr;
// =======================================================================
// NumInputs -- little helper interface to allow derived Node classes to specify how many inputs they expect
// NumInputs -- little helper interface to allow derived Node classes to
// specify how many inputs they expect
// =======================================================================
struct INumInputs { virtual size_t GetExpectedNumInputs() const = 0; };
@ -864,6 +874,14 @@ struct NumInputs : public INumInputs // e.g. derive from NumInputs<2>
}
};
// =======================================================================
// Nodes that can take a dynamic axis need to implement this.
// =======================================================================
struct ITakesDynamicAxis
{
virtual const std::wstring GetRequestedDynamicAxis() const = 0;
};
// =======================================================================
// ComputationNode -- abstract base class for computation nodes, deriving
// from CompuationNodeBase, parameterized by float vs. double
@ -1004,7 +1022,7 @@ public:
if (inputs[i])
m_inputs[i] = DownCast(inputs[i]); // (DownCast() checks the type; the assignment then downcasts it again)
else
m_inputs[i] = nullptr; // during network creation, nullpts are possible
m_inputs[i] = nullptr; // during network creation, nullptrs are possible
}
protected:
@ -1406,7 +1424,7 @@ public:
virtual void RequestMatricesBeforeForwardProp(MatrixPool& matrixPool) override
{
if (IsValueSharable())
RequestMatrixFromPool(m_value, matrixPool);
RequestMatrixFromPool(m_value, matrixPool);
else
CreateMatrixIfNull(m_value);
}

Просмотреть файл

@ -108,6 +108,47 @@ public:
virtual void DumpNodeInfo(const bool printValues, const bool printMetadata, File& fstream) const override;
};
// -----------------------------------------------------------------------
// DynamicAxisNode (/*no input*/)
// This is a holder for MBLayout objects shared across inputs.
// -----------------------------------------------------------------------
template <class ElemType>
class DynamicAxisNode : public ComputationNode<ElemType>, public NumInputs<0>
{
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName() { return L"DynamicAxis"; }
public:
DynamicAxisNode(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name)
{
// BUGBUG: In BS, the node name is not known during node instantiation.
// This may require to pass the display name as a separate parameter.
// This is the whole point of this class: Introduce a new MBLayout that others can use.
LinkToMBLayout(make_shared<MBLayout>(1, 0, name));
// We need some shape, or validation fails.
SetDims(TensorShape(1,1), true);
}
DynamicAxisNode(const ScriptableObjects::IConfigRecordPtr configp)
: DynamicAxisNode(configp->Get(L"deviceId"), L"<placeholder>")
{
}
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange&) override
{
RuntimeError("%ls is a special node only to be used as input to the Input() node.", NodeDescription().c_str());
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t /*inputIndex*/, const FrameRange&)
{
LogicError("%ls is a leaf node. BackpropTo() should never be called.", NodeDescription().c_str());
}
};
template class DynamicAxisNode<float>;
template class DynamicAxisNode<double>;
// -----------------------------------------------------------------------
// InputValueBase (/*no input*/)
// Base class for InputValue and SparseInputValue (typically fed by a DataReader)
@ -116,12 +157,12 @@ public:
// -----------------------------------------------------------------------
template <class ElemType>
class InputValueBase : public ComputationNode<ElemType>, public NumInputs<0>
class InputValueBase : public ComputationNode<ElemType>, public NumInputs<0>, public ITakesDynamicAxis
{
typedef ComputationNode<ElemType> Base;
UsingComputationNodeMembers;
void Init(const TensorShape& sampleLayout, bool isSparse)
void Init(const TensorShape& sampleLayout, bool isSparse, const std::wstring axisName)
{
m_isSparse = isSparse;
MarkValueNonSharable();
@ -131,33 +172,61 @@ class InputValueBase : public ComputationNode<ElemType>, public NumInputs<0>
SetDims(sampleLayout, HasMBLayout()); // also called when reloading a file. Then we have an MBLayout, otherwise not yet
UpdateFunctionValuesSize(); // we must allocate the matrix so that the readers get objects with valid row dimensions (some readers expect that)
SetLearningRateMultiplier(0);
m_dynamicAxisNodeName = axisName;
}
protected:
InputValueBase(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& sampleLayout, bool isSparse)
InputValueBase(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& sampleLayout, bool isSparse, const std::wstring axisName)
: Base(deviceId, name)
{
Init(sampleLayout, isSparse);
Init(sampleLayout, isSparse, axisName);
}
InputValueBase(DEVICEID_TYPE deviceId, const wstring& name, size_t rows, bool isSparse)
: InputValueBase(deviceId, name, TensorShape(rows), isSparse)
InputValueBase(DEVICEID_TYPE deviceId, const wstring& name, size_t rows, bool isSparse, const std::wstring axisName)
: InputValueBase(deviceId, name, TensorShape(rows), isSparse, axisName)
{
}
InputValueBase(DEVICEID_TYPE deviceId, const wstring& name, bool isSparse)
: InputValueBase(deviceId, name, TensorShape(), isSparse)
InputValueBase(DEVICEID_TYPE deviceId, const wstring& name, bool isSparse, const std::wstring axisName)
: InputValueBase(deviceId, name, TensorShape(), isSparse, axisName)
{
}
InputValueBase(const ScriptableObjects::IConfigRecordPtr configp, bool isSparse)
: Base(configp->Get(L"deviceId"), L"<placeholder>")
{
AttachInputsFromConfig(configp, this->GetExpectedNumInputs());
wstring axisName = L"";
// TODO This currently reads a ComputationNode object from a property, thereby bypassing "normal" input handling.
// The passing of shapes represents a second graph that is "overlaid" (and previously identical) to the data
// flow network. This needs to be solved on a more fundamental level.
// The proposed future change from fseide is as follows:
// (2) On BS level, dynamicAxis is an optional parameter that takes a DynamicAxis object--the alternative,
// passing a string, will be removed.
// (3) The dynamicAxis argument will become an actual m_inputs[] to the InputValue. I.e.InputValues are no
// longer leaves from the ComputationNetwork viewpoint. But they ARE leaves from the user / BS / NDL view, as
// the axis is not passed as a regular input.This way, the current special - casing can and will be removed;
// instead, the MBLayout propagation will happen automagically as part of regular ValidateNetwork().
if (configp->Exists(L"dynamicAxis"))
{
auto axisConfig = configp->Find(L"dynamicAxis");
if (axisConfig->Is<ComputationNodeBase>())
{
ComputationNodeBasePtr axis = configp->Get(L"dynamicAxis");
axisName = axis->GetName();
}
else
{
axisName = (const std::wstring&)*axisConfig;
}
}
bool isImage = configp->Get(L"isImage");
if (!isImage)
Init(configp->Get(L"shape"), isSparse);
Init(configp->Get(L"shape"), isSparse, axisName);
else
Init(ImageDimensions::AsTensorShape(configp->Get(L"imageWidth"), configp->Get(L"imageHeight"), configp->Get(L"imageChannels"), ImageLayoutKindFrom(configp->Get(L"imageLayout"))), isSparse);
Init(ImageDimensions::AsTensorShape(configp->Get(L"imageWidth"), configp->Get(L"imageHeight"), configp->Get(L"imageChannels"), ImageLayoutKindFrom(configp->Get(L"imageLayout"))), isSparse, axisName);
}
virtual const std::wstring GetRequestedDynamicAxis() const { return m_dynamicAxisNodeName; }
public:
virtual void Save(File& fstream) const override
{
@ -166,6 +235,10 @@ public:
size_t colsDummy = 0;
fstream << rowsDummy << colsDummy;
m_sampleLayout.Save(fstream);
unsigned int nrAxes = 1;
fstream << nrAxes;
fstream << m_dynamicAxisNodeName;
}
virtual void Load(File& fstream, size_t modelVersion) override
@ -180,10 +253,22 @@ public:
if (rows != 0 /*old file*/ && rows != sampleLayout.GetNumElements() /*even older file*/)
{
fprintf(stderr, "WARNING: %ls InputValue has inconsistent serialized sample layout %s vs. number of rows %d. Resetting sample layout to vector.\n",
NodeName().c_str(), string(sampleLayout).c_str(), (int) rows);
NodeName().c_str(), string(sampleLayout).c_str(), (int)rows);
sampleLayout = TensorShape(rows);
}
Init(sampleLayout, m_isSparse);
if (modelVersion >= CNTK_MODEL_VERSION_8)
{
unsigned int nrAxes;
fstream >> nrAxes;
if (nrAxes == 1)
fstream >> m_dynamicAxisNodeName;
else if (nrAxes > 1)
RuntimeError("Input node: This version only supports a single dynamic axis. Please update your bits.");
}
else
m_dynamicAxisNodeName = L""; // Use default
Init(sampleLayout, m_isSparse, m_dynamicAxisNodeName);
}
// InputValue must not resize its inputs because that might destroy it. It should already have the correct size.
@ -216,6 +301,9 @@ public:
private:
bool m_isSparse = false;
std::wstring m_dynamicAxisNodeName;
ComputationNodeBase* m_dynamicAxisNode;
void ConvertToSparseMatrix()
{
m_value->SwitchToMatrixType(MatrixType::SPARSE, matrixFormatSparseCSC, false);
@ -237,15 +325,19 @@ class InputValue : public InputValueBase<ElemType>
public:
InputValue(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name, false)
: Base(deviceId, name, false, L"")
{
}
InputValue(DEVICEID_TYPE deviceId, const wstring& name, size_t rows)
: Base(deviceId, name, rows, false)
InputValue(DEVICEID_TYPE deviceId, const wstring& name, const wstring& dynamicAxisName)
: Base(deviceId, name, false, dynamicAxisName)
{
}
InputValue(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& sampleLayout)
: Base(deviceId, name, sampleLayout, false)
InputValue(DEVICEID_TYPE deviceId, const wstring& name, size_t rows, const wstring& dynamicAxisName)
: Base(deviceId, name, rows, false, dynamicAxisName)
{
}
InputValue(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& sampleLayout, const wstring& dynamicAxisName)
: Base(deviceId, name, sampleLayout, false, dynamicAxisName)
{
}
InputValue(const ScriptableObjects::IConfigRecordPtr configp)
@ -275,15 +367,19 @@ class SparseInputValue : public InputValueBase<ElemType>
public:
SparseInputValue(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name, true)
: Base(deviceId, name, true, L"")
{
}
SparseInputValue(DEVICEID_TYPE deviceId, const wstring& name, size_t rows)
: Base(deviceId, name, rows, true)
SparseInputValue(DEVICEID_TYPE deviceId, const wstring& name, const wstring& dynamicAxisName)
: Base(deviceId, name, true, dynamicAxisName)
{
}
SparseInputValue(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& imageLayout)
: Base(deviceId, name, imageLayout, true)
SparseInputValue(DEVICEID_TYPE deviceId, const wstring& name, size_t rows, const wstring& dynamicAxisName)
: Base(deviceId, name, rows, true, dynamicAxisName)
{
}
SparseInputValue(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& imageLayout, const wstring& dynamicAxisName)
: Base(deviceId, name, imageLayout, true, dynamicAxisName)
{
}
SparseInputValue(const ScriptableObjects::IConfigRecordPtr configp)

Просмотреть файл

@ -117,7 +117,7 @@ template <class ElemType>
if (!m_pMBLayout)
{
m_pMBLayout = make_shared<MBLayout>(); // this generates a new layout
m_pMBLayout->SetUniqueAxisName(NodeName());
m_pMBLayout->SetUniqueAxisName(L"WhereNodeAxis");
}
// we map scalars to scalars
if (isFinalValidationPass && Input(0)->GetSampleLayout().GetNumElements() != 1)
@ -157,6 +157,7 @@ template <class ElemType>
result(0, jIndex) = (ElemType)jSource;
}
}
// Note: maybe this is no longer needed, now that we do the same inside UpdateFunctionValueSize() for all nodes.
result.CollapseDataLocationAfterWriting(); // BUGBUG: Move back, since BOTH state is broken at present.
}

Просмотреть файл

@ -171,7 +171,7 @@ template class ReshapeNode<float>;
template class ReshapeNode<double>;
// -----------------------------------------------------------------------
// ReconcileMBLayout (dataInput, layoutInput)
// ReconcileDynamicAxis (dataInput, layoutInput)
// This node copies data from 'dataInput' while it propagates the minibatch-layout information from 'layoutInput'.
// It does perform a runtime check to enforce that the layout of 'dataInput' is compatible (identical content) to that of 'layoutInput'.
// This node is meant to be used from BrainScript macros that bracket expand/reduce pairs of nodes. It is not meant to really be used directly.
@ -179,14 +179,14 @@ template class ReshapeNode<double>;
// -----------------------------------------------------------------------
template <class ElemType>
class ReconcileMBLayoutNode : public ComputationNode<ElemType>, public NumInputs<2>
class ReconcileDynamicAxisNode : public ComputationNode<ElemType>, public NumInputs<2>
{
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName() { return L"ReconcileMBLayout"; }
static const std::wstring TypeName() { return L"ReconcileDynamicAxis"; }
public:
DeclareConstructorFromConfigWithNumInputs(ReconcileMBLayoutNode);
ReconcileMBLayoutNode(DEVICEID_TYPE deviceId, const wstring& name)
DeclareConstructorFromConfigWithNumInputs(ReconcileDynamicAxisNode);
ReconcileDynamicAxisNode(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name)
{
}
@ -228,8 +228,8 @@ public:
}
};
template class ReconcileMBLayoutNode<float>;
template class ReconcileMBLayoutNode<double>;
template class ReconcileDynamicAxisNode<float>;
template class ReconcileDynamicAxisNode<double>;
// -----------------------------------------------------------------------
// SliceNode (input)

Просмотреть файл

@ -141,6 +141,7 @@
<ClCompile Include="..\CNTK\BrainScript\BrainScriptEvaluator.cpp" />
<ClCompile Include="..\CNTK\BrainScript\BrainScriptParser.cpp" />
<ClCompile Include="..\Common\Config.cpp" />
<ClCompile Include="..\Common\DataReader.cpp" />
<ClCompile Include="..\Common\Eval.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">true</ExcludedFromBuild>

Просмотреть файл

@ -32,6 +32,9 @@
<ClCompile Include="..\CNTK\BrainScript\BrainScriptParser.cpp">
<Filter>BrainScript</Filter>
</ClCompile>
<ClCompile Include="..\Common\DataReader.cpp">
<Filter>Common</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="EvalReader.h" />

Просмотреть файл

@ -12,7 +12,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// Evaluation Reader class
// interface to pass to evaluation DLL
template <class ElemType>
class EvalReader : public IDataReader
class EvalReader : public DataReaderBase
{
std::map<std::wstring, std::vector<ElemType>*>* m_inputs; // our input data
std::map<std::wstring, size_t>* m_dimensions; // the number of rows for the input data
@ -109,11 +109,11 @@ public:
m_mbSize = min(mbSize, m_recordCount);
}
// GetMinibatch - Get the next minibatch (features and labels)
// TryGetMinibatch - Get the next minibatch (features and labels)
// matrices - [in] a map with named matrix types (i.e. 'features', 'labels') mapped to the corresponding matrix,
// [out] each matrix resized if necessary containing data.
// returns - true if there are more minibatches, false if no more minibatchs remain
virtual bool GetMinibatch(StreamMinibatchInputs& matrices)
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices)
{
// how many records are we reading this time
size_t recordCount = min(m_mbSize, m_recordCount - m_currentRecord);

Просмотреть файл

@ -664,7 +664,7 @@ CPUMatrix<ElemType>& CPUMatrix<ElemType>::DoGatherColumnsOf(ElemType beta, const
continue;
size_t jIn = (size_t)jInF;
if (jIn >= a.GetNumCols())
InvalidArgument("DoGatherColumnsOf: Map out of bounds.");
InvalidArgument("DoGatherColumnsOf: Map out of bounds. %ld >= %ld", (long int)jIn, (long int)a.GetNumCols());
ScaleAndAddColumn(beta, &us(0,jOut), &a(0,jIn), us.GetNumRows(), alpha);
}
@ -6091,7 +6091,7 @@ void CPUMatrix<ElemType>::TensorOp(ElemType beta, const CPUMatrix<ElemType>& a,
if (reductionOp != ElementWiseOperator::opSum) // TODO: enable the reduction ops
InvalidArgument("TensorOp: Unary reduction operations other than opSum not yet implemented.");
// TODO: Change the lambda to take a pointer and a number of elements, so that we can pass it 1 or 4 elements, in order for it to SSE-vectorize.
// TODO: Change the lambda to take a pointer and a number of elements, so that we can pass it 1 or 4 elements, in order for it to SSE-vectorize.
#define CaseUnaryTensorOp(oper) \
case ElementWiseOperator::op##oper: \
return TensorOpWithFn(beta, pointers, alpha, [](const array<ElemType*, 2>& pp) \

Просмотреть файл

@ -893,7 +893,7 @@ __global__ void _doGatherColumnsOf(ElemType* us, size_t usStride, const ElemType
const ElemType& ra = a[ i + jIn * aStride ];
ElemType& rus = us[id/*i + jOut * usStride*/];
ElemType res = ra * alpha;
if (beta != 0)
res += rus * beta;

Просмотреть файл

@ -245,7 +245,7 @@ bool BinaryReader<ElemType>::CheckEndDataset(size_t actualmbsize)
// [out] each matrix resized if necessary containing data.
// returns - true if there are more minibatches, false if no more minibatchs remain
template <class ElemType>
bool BinaryReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
bool BinaryReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
{
// get out if they didn't call StartMinibatchLoop() first
if (m_mbSize == 0)

Просмотреть файл

@ -541,7 +541,7 @@ public:
};
template <class ElemType>
class BinaryReader : public IDataReader
class BinaryReader : public DataReaderBase
{
size_t m_mbSize; // size of minibatch requested
size_t m_mbStartSample; // starting sample # of the next minibatch
@ -587,7 +587,7 @@ public:
}
virtual ~BinaryReader();
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
size_t GetNumParallelSequences()
{

Просмотреть файл

@ -140,7 +140,7 @@ void Indexer::Build()
size_t id = 0;
int64_t offset = GetFileOffset();
// read the very first sequence id
if (!GetNextSequenceId(id))
if (!TryGetSequenceId(id))
{
RuntimeError("Expected a sequence id at the offset %" PRIi64 ", none was found.", offset);
}
@ -156,7 +156,7 @@ void Indexer::Build()
offset = GetFileOffset(); // a new line starts at this offset;
sd.m_numberOfSamples++;
if (!m_done && GetNextSequenceId(id) && id != sd.m_id)
if (!m_done && TryGetSequenceId(id) && id != sd.m_id)
{
// found a new sequence, which starts at the [offset] bytes into the file
sd.m_byteSize = offset - sd.m_fileOffsetBytes;
@ -192,7 +192,7 @@ void Indexer::SkipLine()
}
}
bool Indexer::GetNextSequenceId(size_t& id)
bool Indexer::TryGetSequenceId(size_t& id)
{
bool found = false;
id = 0;

Просмотреть файл

@ -71,7 +71,7 @@ private:
// EOF is reached without hitting the pipe character.
// Returns false if no numerical characters are found preceding the pipe.
// Otherwise, writes sequence id value to the provided reference, returns true.
bool GetNextSequenceId(size_t& id);
bool TryGetSequenceId(size_t& id);
// Build a chunk/sequence index, treating each line as an individual sequence.
// Does not do any sequence parsing, instead uses line number as

Просмотреть файл

@ -332,7 +332,7 @@ void TextParser<ElemType>::IncrementNumberOfErrorsOrDie()
}
template <class ElemType>
bool TextParser<ElemType>::RefillBuffer()
bool TextParser<ElemType>::TryRefillBuffer()
{
size_t bytesRead = fread(m_buffer.get(), 1, BUFFER_SIZE, m_file);
@ -364,7 +364,7 @@ void TextParser<ElemType>::SetFileOffset(int64_t offset)
m_fileOffsetStart = offset;
m_fileOffsetEnd = offset;
RefillBuffer();
TryRefillBuffer();
}
template <class ElemType>
@ -384,7 +384,7 @@ typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence
if (verifyId)
{
size_t id;
if (!ReadUint64(id, bytesToRead) || id != sequenceDsc.m_id)
if (!TryReadUint64(id, bytesToRead) || id != sequenceDsc.m_id)
{
RuntimeError("Did not find the expected sequence id ( %" PRIu64 ") "
" at the file offset = %" PRId64 "\n", sequenceDsc.m_id, GetFileOffset());
@ -410,7 +410,7 @@ typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence
size_t numRowsRead = 0, expectedRowCount = sequenceDsc.m_numberOfSamples;
for (size_t i = 0; i < expectedRowCount; i++)
{
if ((ReadRow(sequence, bytesToRead)))
if ((TryReadRow(sequence, bytesToRead)))
{
++numRowsRead;
}
@ -472,7 +472,7 @@ typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence
}
template <class ElemType>
bool TextParser<ElemType>::ReadRow(SequenceBuffer& sequence, size_t& bytesToRead)
bool TextParser<ElemType>::TryReadRow(SequenceBuffer& sequence, size_t& bytesToRead)
{
bool found = false;
while (bytesToRead && CanRead())
@ -496,7 +496,7 @@ bool TextParser<ElemType>::ReadRow(SequenceBuffer& sequence, size_t& bytesToRead
}
size_t id;
if (!GetInputId(id, bytesToRead))
if (!TryGetInputId(id, bytesToRead))
{
IncrementNumberOfErrorsOrDie();
SkipToNextInput(bytesToRead);
@ -511,7 +511,7 @@ bool TextParser<ElemType>::ReadRow(SequenceBuffer& sequence, size_t& bytesToRead
vector<ElemType>& values = data->m_buffer;
size_t size = values.size();
assert(size % stream.m_sampleDimension == 0);
if (!ReadDenseSample(values, stream.m_sampleDimension, bytesToRead))
if (!TryReadDenseSample(values, stream.m_sampleDimension, bytesToRead))
{
// expected a dense sample, but was not able to fully read it, ignore it.
if (values.size() != size)
@ -533,7 +533,7 @@ bool TextParser<ElemType>::ReadRow(SequenceBuffer& sequence, size_t& bytesToRead
vector<IndexType>& indices = data->m_indices;
assert(values.size() == indices.size());
size_t size = values.size();
if (!ReadSparseSample(values, indices, bytesToRead))
if (!TryReadSparseSample(values, indices, bytesToRead))
{
// expected a sparse sample, but something went south, ignore it.
if (values.size() != size)
@ -572,7 +572,7 @@ bool TextParser<ElemType>::ReadRow(SequenceBuffer& sequence, size_t& bytesToRead
}
template <class ElemType>
bool TextParser<ElemType>::GetInputId(size_t& id, size_t& bytesToRead)
bool TextParser<ElemType>::TryGetInputId(size_t& id, size_t& bytesToRead)
{
char* scratchIndex = m_scratch.get();
@ -664,7 +664,7 @@ bool TextParser<ElemType>::GetInputId(size_t& id, size_t& bytesToRead)
}
template <class ElemType>
bool TextParser<ElemType>::ReadDenseSample(vector<ElemType>& values, size_t sampleSize, size_t& bytesToRead)
bool TextParser<ElemType>::TryReadDenseSample(vector<ElemType>& values, size_t sampleSize, size_t& bytesToRead)
{
size_t counter = 0;
ElemType value;
@ -708,7 +708,7 @@ bool TextParser<ElemType>::ReadDenseSample(vector<ElemType>& values, size_t samp
continue;
}
if (!ReadRealNumber(value, bytesToRead))
if (!TryReadRealNumber(value, bytesToRead))
{
// bail out.
return false;
@ -730,7 +730,7 @@ bool TextParser<ElemType>::ReadDenseSample(vector<ElemType>& values, size_t samp
}
template <class ElemType>
bool TextParser<ElemType>::ReadSparseSample(std::vector<ElemType>& values, std::vector<IndexType>& indices, size_t& bytesToRead)
bool TextParser<ElemType>::TryReadSparseSample(std::vector<ElemType>& values, std::vector<IndexType>& indices, size_t& bytesToRead)
{
size_t index;
ElemType value;
@ -755,7 +755,7 @@ bool TextParser<ElemType>::ReadSparseSample(std::vector<ElemType>& values, std::
}
// read next sparse index
if (!ReadUint64(index, bytesToRead))
if (!TryReadUint64(index, bytesToRead))
{
// bail out.
return false;
@ -771,6 +771,17 @@ bool TextParser<ElemType>::ReadSparseSample(std::vector<ElemType>& values, std::
// bail out.
return false;
}
if (index > numeric_limits<IndexType>::max())
{
if (m_traceLevel >= Warning)
{
fprintf(stderr,
"WARNING: sparse index value(%" PRIu64 ") exceeds the maximum allowed "
" value (%" PRIu64 ")\n", index, (size_t)numeric_limits<IndexType>::max());
}
// bail out.
return false;
}
// an index must be followed by a delimiter
c = *m_pos;
@ -792,7 +803,7 @@ bool TextParser<ElemType>::ReadSparseSample(std::vector<ElemType>& values, std::
}
// read the corresponding value
if (!ReadRealNumber(value, bytesToRead))
if (!TryReadRealNumber(value, bytesToRead))
{
// bail out.
return false;
@ -847,7 +858,7 @@ void TextParser<ElemType>::SkipToNextInput(size_t& bytesToRead)
}
template <class ElemType>
bool TextParser<ElemType>::ReadUint64(size_t& value, size_t& bytesToRead)
bool TextParser<ElemType>::TryReadUint64(size_t& value, size_t& bytesToRead)
{
value = 0;
bool found = false;
@ -900,7 +911,7 @@ bool TextParser<ElemType>::ReadUint64(size_t& value, size_t& bytesToRead)
// cannot be parsed as part of a floating point number.
// Returns true if parsing was successful.
template <class ElemType>
bool TextParser<ElemType>::ReadRealNumber(ElemType& value, size_t& bytesToRead)
bool TextParser<ElemType>::TryReadRealNumber(ElemType& value, size_t& bytesToRead)
{
State state = State::Init;
double coefficient = .0, number = .0, divider = .0;

Просмотреть файл

@ -124,28 +124,28 @@ private:
void SkipToNextValue(size_t& bytesToRead);
void SkipToNextInput(size_t& bytesToRead);
bool RefillBuffer();
bool TryRefillBuffer();
int64_t GetFileOffset() const { return m_fileOffsetStart + (m_pos - m_bufferStart); }
// Reads an alias/name and converts it to an internal stream id (= stream index).
bool GetInputId(size_t& id, size_t& bytesToRead);
bool TryGetInputId(size_t& id, size_t& bytesToRead);
bool ReadRealNumber(ElemType& value, size_t& bytesToRead);
bool TryReadRealNumber(ElemType& value, size_t& bytesToRead);
bool ReadUint64(size_t& value, size_t& bytesToRead);
bool TryReadUint64(size_t& value, size_t& bytesToRead);
// Reads dense sample values into the provided vector.
bool ReadDenseSample(std::vector<ElemType>& values, size_t sampleSize, size_t& bytesToRead);
bool TryReadDenseSample(std::vector<ElemType>& values, size_t sampleSize, size_t& bytesToRead);
// Reads sparse sample values and corresponging indices into the provided vectors.
bool ReadSparseSample(std::vector<ElemType>& values, std::vector<IndexType>& indices, size_t& bytesToRead);
bool TryReadSparseSample(std::vector<ElemType>& values, std::vector<IndexType>& indices, size_t& bytesToRead);
// Reads one whole row (terminated by a row delimiter) of samples
bool ReadRow(SequenceBuffer& sequence, size_t& bytesToRead);
bool TryReadRow(SequenceBuffer& sequence, size_t& bytesToRead);
// Returns true if there's still data available.
bool inline CanRead() { return m_pos != m_bufferEnd || RefillBuffer(); }
bool inline CanRead() { return m_pos != m_bufferEnd || TryRefillBuffer(); }
// Given a descriptor, retrieves the data for the corresponging sequence from the file.
SequenceBuffer LoadSequence(bool verifyId, const SequenceDescriptor& descriptor);

Просмотреть файл

@ -320,7 +320,7 @@ void DSSMReader<ElemType>::StoreLabel(ElemType& labelStore, const LabelType& lab
// [out] each matrix resized if necessary containing data.
// returns - true if there are more minibatches, false if no more minibatchs remain
template <class ElemType>
bool DSSMReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
bool DSSMReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
{
if (m_readNextSample >= m_totalSamples)
{

Просмотреть файл

@ -64,7 +64,7 @@ public:
};
template <class ElemType>
class DSSMReader : public IDataReader
class DSSMReader : public DataReaderBase
{
// public:
// typedef std::string LabelType;
@ -159,7 +159,7 @@ public:
}
virtual ~DSSMReader();
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
size_t GetNumParallelSequences()
{

Просмотреть файл

@ -931,7 +931,7 @@ bool HTKMLFReader<ElemType>::GetHmmData(msra::asr::simplesenonehmm* hmm)
// returns - true if there are more minibatches, false if no more minibatchs remain
// TODO: Why do we have two read functions? Is one not a superset of the other?
template <class ElemType>
bool HTKMLFReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
bool HTKMLFReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
{
if (m_trainOrTest)
{

Просмотреть файл

@ -21,7 +21,7 @@
namespace Microsoft { namespace MSR { namespace CNTK {
template <class ElemType>
class HTKMLFReader : public IDataReader
class HTKMLFReader : public DataReaderBase
{
private:
const static size_t m_htkRandomizeAuto = 0;
@ -184,7 +184,7 @@ public:
virtual void StartDistributedMinibatchLoop(size_t mbSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples = requestDataSize) override;
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
virtual const std::map<LabelIdType, LabelType>& GetLabelMapping(const std::wstring& sectionName);
virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<LabelIdType, LabelType>& labelMapping);
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart = 0);

Просмотреть файл

@ -64,11 +64,15 @@
<PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<TreatWarningAsError>true</TreatWarningAsError>
<AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\common\include;..\..\Math</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">..\..\common\include;..\..\Math</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
@ -82,6 +86,8 @@
<SDLCheck>true</SDLCheck>
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
<TreatWarningAsError>true</TreatWarningAsError>
<AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\common\include;..\..\Math</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release_CpuOnly|x64'">..\..\common\include;..\..\Math</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@ -90,6 +96,8 @@
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<Profile>true</Profile>
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Release_CpuOnly|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
@ -115,6 +123,7 @@
<ClInclude Include="utterancesourcemulti.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\Common\DataReader.cpp" />
<ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="..\..\Common\TimerUtility.cpp">
<PrecompiledHeader>NotUsing</PrecompiledHeader>

Просмотреть файл

@ -15,6 +15,9 @@
<ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="Exports.cpp" />
<ClCompile Include="DataWriterLocal.cpp" />
<ClCompile Include="..\..\Common\DataReader.cpp">
<Filter>Common\Include</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="biggrowablevectors.h" />

Просмотреть файл

@ -846,7 +846,7 @@ void HTKMLFReader<ElemType>::StartMinibatchLoopToWrite(size_t mbSize, size_t /*e
// [out] each matrix resized if necessary containing data.
// returns - true if there are more minibatches, false if no more minibatchs remain
template <class ElemType>
bool HTKMLFReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
bool HTKMLFReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
{
if (m_trainOrTest)
{

Просмотреть файл

@ -13,7 +13,7 @@
namespace Microsoft { namespace MSR { namespace CNTK {
template <class ElemType>
class HTKMLFReader : public IDataReader
class HTKMLFReader : public DataReaderBase
{
private:
msra::dbn::minibatchiterator* m_mbiter;
@ -186,7 +186,7 @@ public:
}
virtual ~HTKMLFReader();
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
virtual const std::map<LabelIdType, LabelType>& GetLabelMapping(const std::wstring& sectionName);
virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<LabelIdType, LabelType>& labelMapping);
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart = 0);

Просмотреть файл

@ -1138,7 +1138,7 @@ void SequenceReader<ElemType>::GetClassInfo()
}
template <class ElemType>
bool SequenceReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
bool SequenceReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
{
FailBecauseDeprecated(__FUNCTION__); // DEPRECATED CLASS, SHOULD NOT BE USED ANYMORE
@ -1889,7 +1889,7 @@ bool BatchSequenceReader<ElemType>::GetMinibatchData(size_t& /*out*/ firstPosInS
// - up to N sequences of the same length are returned in each MB
// - minibatches consist of sequences of the same length only (no gaps)
template <class ElemType>
bool BatchSequenceReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
bool BatchSequenceReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
{
// get out if they didn't call StartMinibatchLoop() first
// TODO: Why not fail here?
@ -2023,7 +2023,7 @@ bool BatchSequenceReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices
timePos: the time position. for example, 100 actual minibatch with 10 streams,
timePosition = [0,..,9] for each actual tiem
*/
// This function was only called from BatchSequenceReader::GetMinibatch(), but no longer.
// This function was only called from BatchSequenceReader::TryGetMinibatch(), but no longer.
template <class ElemType>
void BatchSequenceReader<ElemType>::SetSentenceBegin(int wrd, int uttPos, int timePos)
{

Просмотреть файл

@ -109,7 +109,7 @@ public:
// Note: This class is deprecated for standalone use, only used as a base for BatchSequenceReader which overrides most of the functions.
template <class ElemType>
class SequenceReader : public IDataReader
class SequenceReader : public DataReaderBase
{
protected:
bool m_idx2clsRead;
@ -276,7 +276,7 @@ public:
}
virtual ~SequenceReader();
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
// void SetSentenceSegBatch(std::vector<size_t> &/*sentenceEnd*/) {};
// TODO: ^^ should this be void CopyMBLayoutTo(MBLayoutPtr pMBLayout);
@ -407,7 +407,7 @@ private:
public:
void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize) override;
bool GetMinibatch(StreamMinibatchInputs& matrices) override;
bool TryGetMinibatch(StreamMinibatchInputs& matrices) override;
bool DataEnd() override;
void CopyMBLayoutTo(MBLayoutPtr pMBLayout) { assert(mToProcess.size() == m_pMBLayout->GetNumParallelSequences()); pMBLayout->CopyFrom(m_pMBLayout); }

Просмотреть файл

@ -817,7 +817,7 @@ void BatchLUSequenceReader<ElemType>::SetNumParallelSequences(const size_t mz)
}
template <class ElemType>
bool BatchLUSequenceReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
bool BatchLUSequenceReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
{
// get out if they didn't call StartMinibatchLoop() first
// TODO: Why is this allowed? Why not terminate?
@ -881,12 +881,12 @@ bool BatchLUSequenceReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matric
{
assert(idx == (LabelIdType) NULLLABEL); // TODO: what other conditions?
// if (!m_pMBLayout->IsGap(s, t)) // verify that these are marked as NoInput
// LogicError("BatchLUSequenceReader::GetMinibatch observation is larger than its dimension but no_labels sign is not used to indicate that this observation has no labels. Possible reason is a bug in EnsureDataAvailable or a bug here.");
// LogicError("BatchLUSequenceReader::TryGetMinibatch observation is larger than its dimension but no_labels sign is not used to indicate that this observation has no labels. Possible reason is a bug in EnsureDataAvailable or a bug here.");
continue;
}
// if (m_pMBLayout->IsGap(s, t)) // verify that these are marked as NoInput
// LogicError("BatchLUSequenceReader::GetMinibatch: Inconsistent NoInput flag");
// LogicError("BatchLUSequenceReader::TryGetMinibatch: Inconsistent NoInput flag");
locObs.SetValue(idx + jj * featInfo.dim, j, (ElemType) 1);
}
@ -1171,7 +1171,7 @@ template class BatchLUSequenceReader<double>;
template class BatchLUSequenceReader<float>;
template <class ElemType>
bool MultiIOBatchLUSequenceReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
bool MultiIOBatchLUSequenceReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
{
// on first iteration, need to check if all requested data matrices are available
std::map<std::wstring, size_t>::iterator iter;

Просмотреть файл

@ -47,7 +47,7 @@ enum ReaderMode
};
template <class ElemType>
class LUSequenceReader : public IDataReader
class LUSequenceReader : public DataReaderBase
{
protected:
bool m_idx2clsRead;
@ -319,7 +319,7 @@ public:
size_t GetLabelOutput(StreamMinibatchInputs& matrices, LabelInfo& labelInfo, size_t actualmbsize);
void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
bool GetMinibatch(StreamMinibatchInputs& matrices);
bool TryGetMinibatch(StreamMinibatchInputs& matrices);
bool EnsureDataAvailable(size_t mbStartSample);
size_t GetNumParallelSequences();
@ -411,7 +411,7 @@ public:
}
};
bool GetMinibatch(StreamMinibatchInputs& matrices);
bool TryGetMinibatch(StreamMinibatchInputs& matrices);
void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples);

Просмотреть файл

@ -780,7 +780,7 @@ void LibSVMBinaryReader<ElemType>::DoDSSMMatrix(Matrix<ElemType>& mat, size_t ac
}
template <class ElemType>
bool LibSVMBinaryReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
bool LibSVMBinaryReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
{
//timer = clock();
#if DEBUG

Просмотреть файл

@ -226,7 +226,7 @@ private:
};
template <class ElemType>
class LibSVMBinaryReader : public IDataReader
class LibSVMBinaryReader : public DataReaderBase
{
public:
virtual void Init(const ConfigParameters& config) override
@ -254,7 +254,7 @@ public:
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
virtual void StartDistributedMinibatchLoop(size_t mbSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples) override;
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
virtual bool SupportsDistributedMBRead() const override
{

Просмотреть файл

@ -158,9 +158,10 @@ Minibatch BpttPacker::ReadMinibatch()
for (size_t streamIndex = 0; streamIndex < m_outputStreamDescriptions.size(); ++streamIndex)
{
m_currentLayouts[streamIndex]->Init(m_numParallelSequences, m_truncationSize);
size_t sequenceId = 0;
for (size_t slotIndex = 0; slotIndex < m_numParallelSequences; ++slotIndex)
{
PackSlot(streamIndex, slotIndex);
PackSlot(streamIndex, slotIndex, sequenceId);
}
StreamMinibatchPtr m = make_shared<StreamMinibatch>();
@ -173,7 +174,7 @@ Minibatch BpttPacker::ReadMinibatch()
}
// Packs a slot of sequences into the minibatch.
void BpttPacker::PackSlot(size_t streamIndex, size_t slotIndex)
void BpttPacker::PackSlot(size_t streamIndex, size_t slotIndex, size_t& sequenceId)
{
auto& slot = m_sequenceBufferPerStream[streamIndex]->m_slots[slotIndex];
@ -204,7 +205,7 @@ void BpttPacker::PackSlot(size_t streamIndex, size_t slotIndex)
// Add current sequence to the minibatch layout.
m_currentLayouts[streamIndex]->AddSequence(
NEW_SEQUENCE_ID,
sequenceId++,
slotIndex,
-(int)slot.m_sampleCursor,
slot.FrontSequence()->m_numberOfSamples - slot.m_sampleCursor);
@ -220,7 +221,7 @@ void BpttPacker::PackSlot(size_t streamIndex, size_t slotIndex)
//Adding next sequence to the minibatch.
m_currentLayouts[streamIndex]->AddSequence(
NEW_SEQUENCE_ID,
sequenceId++,
slotIndex,
currentTimestep,
currentTimestep + slot.FrontSequence()->m_numberOfSamples);

Просмотреть файл

@ -36,7 +36,11 @@ private:
void ReadSequencesToSlot(size_t slotIndex);
// Packs a slot into the data buffer.
void PackSlot(size_t streamIndex, size_t slotIndex);
// SequenceId specifies the starting value to be used as sequence identifier.
// For each new input, sequence id is reset to 0, and incremented each time
// a sequence is added to the layout. This allows layouts corresponding to different
// inputs to have consistent sequence ids.
void PackSlot(size_t streamIndex, size_t slotIndex, size_t& sequenceId);
virtual MBLayoutPtr CreateMBLayout(const StreamBatch& batch)
{

Просмотреть файл

@ -34,9 +34,9 @@ protected:
};
PackerBase(MemoryProviderPtr memoryProvider,
TransformerPtr transformer,
size_t minibatchSize,
const std::vector<StreamDescriptionPtr>& streams);
TransformerPtr transformer,
size_t minibatchSize,
const std::vector<StreamDescriptionPtr>& streams);
typedef std::vector<SequenceDataPtr> StreamBatch;

Просмотреть файл

@ -22,9 +22,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template <class ElemType>
ReaderShim<ElemType>::ReaderShim(ReaderFactory factory)
: m_layout(make_shared<MBLayout>()), m_factory(factory)
: m_factory(factory)
{
m_layout->SetUniqueAxisName(L"ReaderShim");
}
template <class ElemType>
@ -38,8 +37,7 @@ void ReaderShim<ElemType>::Init(const ConfigParameters& config)
// otherwise deferring - synchronous execution during .get() call
m_launchType = prefetch ? launch::async : launch::deferred;
auto numSeqsPerMBForAllEpochs = numberOfuttsPerMinibatchForAllEpochs;
m_layout->Init(numSeqsPerMBForAllEpochs[0], 0);
m_numParallelSequences = numberOfuttsPerMinibatchForAllEpochs[0];
m_reader = m_factory(config);
m_streams = m_reader->GetStreamDescriptions();
@ -105,7 +103,6 @@ string EnumerateInputs(const map<wstring, size_t> &nameToStreamId)
template <class ElemType>
bool ReaderShim<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
{
// TODO: verify that the set of matrix names is identical
// to the set of reader input names. Warn if it's a subset, throw
// if it's a superset.
@ -133,6 +130,15 @@ bool ReaderShim<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
}
}
// Reset stale mb layouts.
// BUGBUG: This seems incorrect. (1) layouts should all be updated below, and (2) some of these layouts are the same, we are resetting them twice.
for (const auto& iter : matrices)
{
iter.second.pMBLayout->Init(1, 0);
}
// a map to generate error messages when checking layout constraints.
map<wstring, wstring> layoutToInputMap;
if (!minibatch.m_data.empty())
{
// TODO: Use alternating pinned buffer in the packer, do not copy anything, but pack into the pinned memory.
@ -147,9 +153,31 @@ bool ReaderShim<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
}
size_t streamId = m_nameToStreamId[mx.first];
const auto& stream = minibatch.m_data[streamId];
m_layout = stream->m_layout;
m_numParallelSequences = stream->m_layout->GetNumParallelSequences();
// This assert no longer holds - different inputs have different sequence lengths, resulting in different number
// of parallel samples.
// assert(m_numParallelSequences == minibatch.m_data.front()->m_layout->GetNumParallelSequences());
auto& layout = mx.second.pMBLayout;
if (layout->GetNumCols() == 0)
{
// layout is empty, copy layout info from the reader
layout->CopyFrom(stream->m_layout, /*keepName*/ true);
layoutToInputMap[layout->GetAxisName()] = mx.first;
}
else if (*layout != *stream->m_layout) // this does a deep value-level comparison
{
RuntimeError("Dynamic axis layout '%ls' is shared between inputs '%ls' and '%ls', but layouts generated "
"from the input data are incompatible on this axis. Are you using different sequence lengths? "
"Did you consider adding a DynamicAxis() to the Input nodes?",
layout->GetAxisName(), layoutToInputMap[layout->GetAxisName()].c_str(), mx.first.c_str());
}
size_t sampleSize = m_streams[streamId]->m_sampleLayout->GetNumElements();
auto& matrix = matrices.GetInputMatrix<ElemType>(mx.first);
FillMatrixFromStream(m_streams[streamId]->m_storageType, &matrix, sampleSize, stream);
@ -200,13 +228,21 @@ bool ReaderShim<ElemType>::DataEnd() { return false; } // Note: Return value nev
template <class ElemType>
void ReaderShim<ElemType>::CopyMBLayoutTo(MBLayoutPtr layout)
{
layout->CopyFrom(m_layout);
// This method is inherited from IDataReader and should be removed in the near future.
NOT_IMPLEMENTED;
}
template <class ElemType>
size_t ReaderShim<ElemType>::GetNumParallelSequences()
{
return m_layout->GetNumParallelSequences();
// BUGBUG This is a property of the stream, of which this reader might produce several, with different nr. of
// parallel sequences. Thus this property doesn't make sense anymore.
// This method is called by
// * DataReaderHelpers::GetNumSubminibatchesNeeded to estimate mb size
// * ComputationNetwork::SetBatchNormalizationTimeConstants to compute learning rate per sample
// * ComputationNetwork::SetBatchNormalizationTimeConstants to compute actual mb size and momentum per sample
// * SGD::AdaptiveMinibatchSizing to compute learning rate per sample
return m_numParallelSequences;
}
template class ReaderShim<float>;

Просмотреть файл

@ -58,7 +58,7 @@ private:
ReaderFactory m_factory;
bool m_endOfEpoch;
MBLayoutPtr m_layout;
size_t m_numParallelSequences;
std::map<std::wstring, size_t> m_nameToStreamId;
std::vector<StreamDescriptionPtr> m_streams;

Просмотреть файл

@ -207,7 +207,7 @@ void SparsePCReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t /*epoch*
// [out] each matrix resized if necessary containing data.
// returns - true if there are more minibatches, false if no more minibatchs remain
template <class ElemType>
bool SparsePCReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
bool SparsePCReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
{
// get out if they didn't call StartMinibatchLoop() first
if (m_miniBatchSize == 0)

Просмотреть файл

@ -21,7 +21,7 @@
namespace Microsoft { namespace MSR { namespace CNTK {
template <class ElemType>
class SparsePCReader : public IDataReader
class SparsePCReader : public DataReaderBase
{
ConfigParameters m_readerConfig;
std::wstring m_file;
@ -76,7 +76,7 @@ public:
InitFromConfig(config);
}
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
size_t GetNumParallelSequences()
{

Просмотреть файл

@ -765,7 +765,7 @@ void UCIFastReader<ElemType>::StoreLabel(ElemType& labelStore, const LabelType&
// [out] each matrix resized if necessary containing data.
// returns - true if there are more minibatches, false if no more minibatchs remain
template <class ElemType>
bool UCIFastReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
bool UCIFastReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
{
bool minibatchesRemaining = true;
if (m_pendingAsyncGetMinibatch.valid())

Просмотреть файл

@ -36,7 +36,7 @@ enum LabelKind
};
template <class ElemType>
class UCIFastReader : public IDataReader
class UCIFastReader : public DataReaderBase
{
shared_ptr<UCIParser<ElemType, LabelType>> m_parser;
size_t m_mbSize; // size of minibatch requested
@ -151,7 +151,7 @@ public:
virtual void StartDistributedMinibatchLoop(size_t mbSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples = requestDataSize) override;
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
bool GetMinibatchImpl(StreamMinibatchInputs& matrices);

Просмотреть файл

@ -34,9 +34,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
const MPIWrapperPtr& mpi)
{
// Reading consists of a sequence of Reader API calls:
// - GetMinibatch() --fills the inputMatrices
// - GetMinibatch() --fills the inputMatrices and copies the MBLayout from Reader into inputMatrices
// - SetActualMiniBatchSizeFromFeatures() --tells Network to resize the nodes' buffers
// - CopyMBLayoutTo() --copies the MBLayout from Reader to Network
// with the special twist that in presence of parallelization, there is some decimation involved.
bool wasDataRead = trainSetDataReader.GetMinibatch(inputMatrices); // fill in the minibatch data into the Input nodes' buffers directly
@ -61,13 +60,23 @@ namespace Microsoft { namespace MSR { namespace CNTK {
trainSetDataReader.GetMinibatch4SE(*latticeinput, *uids, *boundaries, *extrauttmap);
}
// get layout meta-data
// BUGBUG (Issue #95): must be adapted for multiple MBLayouts
trainSetDataReader.CopyMBLayoutTo(net->GetMBLayoutPtrOfNetwork());
// TODO: move this into shim for the old readers.
// decimate if needed. Decimation happens in-place.
// This is only allowed for old readers, which support a single layout for all inputs.
if (!useDistributedMBReading && useParallelTrain)
DecimateMinibatchInPlace<ElemType>(inputMatrices, mpi->NumNodesInUse(), mpi->CurrentNodeRank(), net->GetMBLayoutPtrOfNetwork());
{
auto& pMBLayout = net->GetMBLayoutPtrOfNetwork();
// Verify that there's indeed a single layout
for (const auto& iter : inputMatrices)
{
assert(iter.second.pMBLayout == pMBLayout);
// TODO: This must be a runtime check, not an assert().
UNUSED(iter);
}
DecimateMinibatchInPlace<ElemType>(inputMatrices, mpi->NumNodesInUse(), mpi->CurrentNodeRank(), pMBLayout);
}
// reader will have resized input node's m_value directly. Nodes must be notified to do necessary internal state updates from that.
// TODO: This is a stopgap. SGD will at some point change from sets of matrices to sets of nodes. Then this will become much simpler.
@ -139,7 +148,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// decimatedMB[name]->SetValue(mat.Reshaped(nRows*nSequence, nT).RowSlice( st*nRows , (en-st)*nRows).Reshaped(nRows, nNewParallelSequence*nT));
}
// decimate MBLayout as well
pDecimateMBLayout = make_shared<MBLayout>(numNewParallelSequence, nT);
pDecimateMBLayout = make_shared<MBLayout>(numNewParallelSequence, nT, L"");
pDecimateMBLayout->SetAxisName(pMBLayout->GetAxisName());
#if 1
// now copy over all sequence info records that are inside the range, with adjusted 's'

Просмотреть файл

@ -0,0 +1,129 @@
# Copyright (c) Microsoft. All rights reserved.
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
command=Train #:Write
deviceId = $DeviceId$
modelPath="$ModelDir$/seqcla.dnn"
Train=[
action="train"
run=BrainScriptNetworkBuilder
BrainScriptNetworkBuilder=[
Macros = [
// define "last hidden state of sequence" in the LSTM (really for any sequence though)
TakeRight (N, x) = BS.Sequences._Take(FutureValue, N, x)
Last(x) = TakeRight(1, x)
]
Layers = [
EmbeddingLayer(input, vocabSize, embeddingDim, embeddingPath) = [
embedding = Transpose(LearnableParameter(vocabSize, embeddingDim, learningRateMultiplier = 0.0, init = 'fromFile', initFromFilePath = embeddingPath))
lookup = GatherPacked(features, embedding)
].lookup
DenseLayer(input, inputSize, outputSize, activation) = [
z = BFF(input, outputSize, inputSize).z
act = activation(z)
].act
LSTMLayer(input, inputSize, outputSize, cellSize, selector) = [
lstm = BS.RNNs.RecurrentLSTMP(inputSize, outputSize, cellSize, input)
result = selector(lstm)
].result
]
// LSTM params
lstmDim = 25
cellDim = 25
// model
numLabels = 5
vocab = 2000
embedDim = 50
// set up features and labels
t = DynamicAxis()
features = Input(1, dynamicAxis=t) # Input has shape (1,t)
labels = Input(numLabels) # Input has shape (numLabels,*) where all sequences in *=1
// load the pre-learned word embedding matrix
l1 = Layers.EmbeddingLayer(features, vocab, embedDim, 'embeddingmatrix.txt')
l2 = Layers.LSTMLayer(l1, embedDim, lstmDim, cellDim, Macros.Last)
l3 = Layers.DenseLayer(l2, lstmDim, numLabels, Pass)
out = Pass(l3, tag='output')
// Make sure the trainer understands that the time dimension of l3 is actually the same as that of labels.
l3p = ReconcileDynamicAxis(l3, labels)
// training criteria
ce = CrossEntropyWithSoftmax(labels, l3p, tag='criterion') // this is the training objective
wer = ErrorPrediction (labels, l3p, tag='evaluation') // this also gets tracked
]
SGD = [
epochSize = 0
minibatchSize = 200
maxEpochs = 5
momentumPerMB = 0.9
learningRatesPerMB = 0.1
]
reader = [
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Train.txt"
input = [
features=[
alias = "x"
dim = 1
format = "dense"
]
labels=[
alias = "y"
dim = 5
format = "dense"
]
]
]
outputPath = "$OutputDir$/output.txt" # dump the output as text?
]
Write=[
action="test"
run=BrainScriptNetworkBuilder
format = [
# %n = minibatch, %x = shape, %d = sequenceId
sequencePrologue=%d\t|w.shape %x\n%d\t|w\s
sampleSeparator=\n%d\t|w\s
elementSeparator=\s
]
modelFile = "$ModelDir$/seqcla.dnn"
reader = [
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Train.txt"
input = [
features=[
alias = "x"
dim = 1
format = "dense"
]
labels=[
alias = "y"
dim = 5
format = "dense"
]
]
]
outputPath = "$OutputDir$/output.txt" # dump the output as text?
]

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -0,0 +1,16 @@
#!/bin/bash
. $TEST_ROOT_DIR/run-test-common
ConfigDir=$TEST_DIR/Config
# cntkrun <CNTK config file name> <additional CNTK args>
DeleteModelsAfterTest=0
cntkrun seqcla.cntk || exit $?
echo === Deleting last epoch data
rm $TEST_RUN_DIR/Models/*.dnn
echo ==== Re-running from checkpoint
DeleteExistingModels=0
DeleteModelsAfterTest=1
# cntkrun <CNTK config file name> <additional CNTK args>
cntkrun seqcla.cntk 'makeMode=true' || exit $?

Просмотреть файл

@ -0,0 +1,33 @@
dataDir: ./Data
tags:
# running on every BVT job in 'S' (Speech) leg in Debug-GPU and Release-CPU configurations:
- bvt-s (build_sku == 'gpu') and ((flavor=='debug') ^ (device=='cpu'))
# running unconditionally on every Nightly job in 'S' leg
- nightly-s (build_sku == 'gpu')
testCases:
CNTK Run must be completed:
patterns:
- __COMPLETED__
Must train epochs in exactly same order and parameters:
patterns:
- Starting Epoch {{integer}}
- learning rate per sample = {{float}}
- momentum = {{float}}
Epochs must be finished with expected results:
patterns:
- Finished Epoch[{{integer}} of {{integer}}]
- TrainLossPerSample = {{float,tolerance=.5%}}
# TODO GPU has an initial EvalErr rate which is larger than on CPU and otherwise.
# In later epochs the results are aligned. Why?
- EvalErrPerSample = {{float,tolerance=13%}}
- AvgLearningRatePerSample = {{float,tolerance=0.001%}}
# Per-minibatch training results must match:
# patterns:
# - Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}}
# - SamplesSeen = {{integer}}
# - TrainLossPerSample = {{float,tolerance=.5%}}
# - EvalErr[0]PerSample = {{float,tolerance=.5%}}

Просмотреть файл

@ -110,7 +110,6 @@
<ClCompile Include="..\..\..\Source\CNTK\BrainScript\BrainScriptEvaluator.cpp" />
<ClCompile Include="..\..\..\Source\CNTK\BrainScript\BrainScriptParser.cpp" />
<ClCompile Include="..\..\..\Source\CNTK\BrainScript\BrainScriptTest.cpp" />
<ClCompile Include="..\..\..\Source\CNTK\BrainScript\ExperimentalNetworkBuilder.cpp" />
<ClCompile Include="..\..\..\Source\Common\Config.cpp" />
<ClCompile Include="..\..\..\Source\Common\DataReader.cpp" />
<ClCompile Include="..\..\..\Source\Common\DataWriter.cpp" />

Просмотреть файл

@ -34,9 +34,6 @@
<ClCompile Include="..\..\..\Source\CNTK\BrainScript\BrainScriptTest.cpp">
<Filter>From BrainScript</Filter>
</ClCompile>
<ClCompile Include="..\..\..\Source\CNTK\BrainScript\ExperimentalNetworkBuilder.cpp">
<Filter>From BrainScript</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<Filter Include="Config">

Просмотреть файл

@ -189,20 +189,20 @@ struct ReaderFixture
for (auto cnt = 0; dataReader.GetMinibatch(map) && cnt < m_maxMiniBatchCount; cnt++)
{
MBLayoutPtr pMBlayoutPtr = make_shared<MBLayout>();
dataReader.CopyMBLayoutTo(pMBlayoutPtr);
// Process the Feature Matri(x|ces)
for (auto i = 0; i < numFeatureFiles; i++)
{
wstring name = numFeatureFiles > 1 ? L"features" + std::to_wstring(i + 1) : L"features";
OutputMatrix(map.GetInputMatrix<ElemType>(name), *pMBlayoutPtr, outputFile);
auto& layoutPtr = map.GetInput(name).pMBLayout;
OutputMatrix(map.GetInputMatrix<ElemType>(name), *layoutPtr, outputFile);
}
// Process the Label Matri(x|ces)
for (auto i = 0; i < numLabelFiles; i++)
{
wstring name = numLabelFiles > 1 ? L"labels" + std::to_wstring(i + 1) : L"labels";
OutputMatrix(map.GetInputMatrix<ElemType>(name), *pMBlayoutPtr, outputFile);
auto& layoutPtr = map.GetInput(name).pMBLayout;
OutputMatrix(map.GetInputMatrix<ElemType>(name), *layoutPtr, outputFile);
}
}
}
@ -255,7 +255,10 @@ struct ReaderFixture
std::vector<shared_ptr<Matrix<ElemType>>> features;
std::vector<shared_ptr<Matrix<ElemType>>> labels;
MBLayoutPtr pMBLayout = make_shared<MBLayout>();
// For the time being, use the same layout across all inputs.
// TODO: add an option to create per-input layouts (once we have test-cases with different layouts)
MBLayoutPtr pMBLayout = make_shared<MBLayout>(1, 0, L"X");
for (auto i = 0; i < numFeatureFiles; i++)
{
features.push_back(make_shared<Matrix<ElemType>>(0));