merged from master and resolved conflicts
This commit is contained in:
Коммит
4c9f91868e
|
@ -150,6 +150,10 @@ GeneratedArtifacts/
|
|||
_Pvt_Extensions/
|
||||
ModelManifest.xml
|
||||
|
||||
# Python
|
||||
*.pyc
|
||||
__pychache__/
|
||||
|
||||
# =========================
|
||||
# Windows detritus
|
||||
# =========================
|
||||
|
|
62
CNTK.sln
62
CNTK.sln
|
@ -913,6 +913,58 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NetworkTests", "Tests\UnitT
|
|||
{EAD17188-072C-4726-B840-A769C36DAD1B} = {EAD17188-072C-4726-B840-A769C36DAD1B}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Text", "Text", "{8656B71D-E24C-4AC2-8BE4-C07B415A3E15}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SequenceClassification", "SequenceClassification", "{E53E63A0-FAA9-4416-9AD1-08A8FB87FEE1}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Miscellaneous", "Miscellaneous", "{8629430A-821E-43BA-AEC5-8B2CF31A2A7A}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "CIFAR-10", "CIFAR-10", "{0141526B-F257-4574-8CBE-99634726FFCE}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "01_Convolution", "01_Convolution", "{58286327-6742-44C4-A34E-D2583419E55E}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.cpu.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.cpu.txt
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.gpu.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.linux.gpu.txt
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.windows.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\baseline.windows.txt
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\run-test = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\run-test
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\testcases.yml = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\01_Convolution\testcases.yml
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "02_BatchNormConv", "02_BatchNormConv", "{AB9207B9-B134-4C57-B7ED-F3DCF7B0DC5F}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.linux.gpu.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.linux.gpu.txt
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.windows.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\baseline.windows.txt
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\run-test = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\run-test
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\testcases.yml = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\02_BatchNormConv\testcases.yml
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "03_ResNet", "03_ResNet", "{12FB912C-43F8-40FE-BD7F-B52F589A1EBC}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\baseline.linux.gpu.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\baseline.linux.gpu.txt
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\baseline.windows.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\baseline.windows.txt
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\run-test = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\run-test
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\testcases.yml = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\03_ResNet\testcases.yml
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "04_ResNet", "04_ResNet", "{2BFE4D88-6F32-4701-887A-1DE3D7626DBB}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\baseline.linux.gpu.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\baseline.linux.gpu.txt
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\baseline.windows.txt = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\baseline.windows.txt
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\run-test = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\run-test
|
||||
Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\testcases.yml = Tests\EndToEndTests\Examples\Image\Miscellaneous\CIFAR-10\04_ResNet_56\testcases.yml
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Config", "Config", "{EC780385-7580-4D15-914B-1D878A295CBC}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tests\EndToEndTests\Text\SequenceClassification\Config\seqcla.cntk = Tests\EndToEndTests\Text\SequenceClassification\Config\seqcla.cntk
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{D11F76CC-DB6D-4CB4-B3B7-AB139DE2F5FA}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tests\EndToEndTests\Text\SequenceClassification\Data\embeddingmatrix.txt = Tests\EndToEndTests\Text\SequenceClassification\Data\embeddingmatrix.txt
|
||||
Tests\EndToEndTests\Text\SequenceClassification\Data\Train.txt = Tests\EndToEndTests\Text\SequenceClassification\Data\Train.txt
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug_CpuOnly|x64 = Debug_CpuOnly|x64
|
||||
|
@ -1254,5 +1306,15 @@ Global
|
|||
{48C2A9DE-FB2C-4724-9ADC-744216D79BCF} = {08A05A9A-4E45-42D5-83FA-719E99C04A30}
|
||||
{2B1046A1-0140-43B7-B3DC-CF7DEEE1009E} = {8071EF60-30F7-4A77-81AA-ADCA0E18B1E3}
|
||||
{CDA96AA3-3252-4978-A0BF-2ACD670823CB} = {6F19321A-65E7-4829-B00C-3886CD6C6EDE}
|
||||
{8656B71D-E24C-4AC2-8BE4-C07B415A3E15} = {6E565B48-1923-49CE-9787-9BBB9D96F4C5}
|
||||
{E53E63A0-FAA9-4416-9AD1-08A8FB87FEE1} = {8656B71D-E24C-4AC2-8BE4-C07B415A3E15}
|
||||
{8629430A-821E-43BA-AEC5-8B2CF31A2A7A} = {FC7E7EC7-6E6A-4518-81C6-DA60451C657A}
|
||||
{0141526B-F257-4574-8CBE-99634726FFCE} = {8629430A-821E-43BA-AEC5-8B2CF31A2A7A}
|
||||
{58286327-6742-44C4-A34E-D2583419E55E} = {0141526B-F257-4574-8CBE-99634726FFCE}
|
||||
{AB9207B9-B134-4C57-B7ED-F3DCF7B0DC5F} = {0141526B-F257-4574-8CBE-99634726FFCE}
|
||||
{12FB912C-43F8-40FE-BD7F-B52F589A1EBC} = {0141526B-F257-4574-8CBE-99634726FFCE}
|
||||
{2BFE4D88-6F32-4701-887A-1DE3D7626DBB} = {0141526B-F257-4574-8CBE-99634726FFCE}
|
||||
{EC780385-7580-4D15-914B-1D878A295CBC} = {E53E63A0-FAA9-4416-9AD1-08A8FB87FEE1}
|
||||
{D11F76CC-DB6D-4CB4-B3B7-AB139DE2F5FA} = {E53E63A0-FAA9-4416-9AD1-08A8FB87FEE1}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
|
|
1
Makefile
1
Makefile
|
@ -586,7 +586,6 @@ CNTK_SRC =\
|
|||
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptEvaluator.cpp \
|
||||
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptParser.cpp \
|
||||
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptTest.cpp \
|
||||
$(SOURCEDIR)/CNTK/BrainScript/ExperimentalNetworkBuilder.cpp \
|
||||
$(SOURCEDIR)/Common/BestGpu.cpp \
|
||||
$(SOURCEDIR)/Common/MPIWrapper.cpp \
|
||||
|
||||
|
|
|
@ -209,7 +209,8 @@ template <typename ElemType>
|
|||
void DoWriteOutput(const ConfigParameters& config)
|
||||
{
|
||||
ConfigParameters readerConfig(config(L"reader"));
|
||||
readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
|
||||
// Why?
|
||||
//readerConfig.Insert("traceLevel", config(L"traceLevel", "0"));
|
||||
readerConfig.Insert("randomize", "None"); // we don't want randomization when output results
|
||||
|
||||
DataReader testDataReader(readerConfig);
|
||||
|
|
|
@ -73,14 +73,16 @@ void NDLNodeEvaluatorImpl<ElemType>::Evaluate(NDLNode<ElemType>* node, const wst
|
|||
size_t i = 0;
|
||||
auto tensorShape = ProcessTensorShapeParameters(node, params, i, /*isImage=*/false, cnNodeType);
|
||||
|
||||
wstring dynamicAxis = node->GetOptionalParameter("dynamicAxis", "");
|
||||
// TODO: Map dynamicAxis from name to node at this point, where that node is memoized inside NDL.
|
||||
// first look for this node already existing in the network
|
||||
// BUGBUG: How does this set the dimensions then?
|
||||
if (m_net->NodeNameExists(name))
|
||||
nodePtr = dynamic_pointer_cast<ComputationNode<ElemType>>(m_net->GetNodeFromName(name));
|
||||
else if (isSparse)
|
||||
nodePtr = builder.CreateSparseInputNode(name, tensorShape);
|
||||
nodePtr = builder.CreateSparseInputNode(name, tensorShape, dynamicAxis);
|
||||
else
|
||||
nodePtr = builder.CreateInputNode(name, tensorShape);
|
||||
nodePtr = builder.CreateInputNode(name, tensorShape, dynamicAxis);
|
||||
}
|
||||
}
|
||||
else if (cnNodeType == L"ImageInput" || cnNodeType == L"SparseImageInput")
|
||||
|
@ -97,11 +99,12 @@ void NDLNodeEvaluatorImpl<ElemType>::Evaluate(NDLNode<ElemType>* node, const wst
|
|||
size_t imageHeight = ((NDLNode<ElemType>*) params[1])->GetScalar();
|
||||
size_t imageChannels = ((NDLNode<ElemType>*) params[2])->GetScalar();
|
||||
ImageLayoutKind imageLayoutKind = ImageLayoutKindFrom(node->GetOptionalParameter("imageLayout", "HWC"));
|
||||
wstring dynamicAxis = node->GetOptionalParameter("dynamicAxis", "");
|
||||
|
||||
if (isSparse)
|
||||
nodePtr = builder.CreateSparseInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind));
|
||||
nodePtr = builder.CreateSparseInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind), dynamicAxis);
|
||||
else
|
||||
nodePtr = builder.CreateInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind));
|
||||
nodePtr = builder.CreateInputNode(name, ImageDimensions::AsTensorShape(imageWidth, imageHeight, imageChannels, imageLayoutKind), dynamicAxis);
|
||||
}
|
||||
}
|
||||
else if (OperationNameOf(LearnableParameter) == cnNodeType || cnNodeType == L"ImageParameter")
|
||||
|
|
|
@ -34,10 +34,12 @@ Parameter = LearnableParameter // deprecated
|
|||
# TODO: make Parameter take tensor dims?
|
||||
ParameterTensor(dims, learningRateMultiplier = 1.0, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initFromLiteral = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]
|
||||
ConstantFromString(literal, tag='') = ParameterTensor((0)/*dim, will be inferred*/, init = 'fromLiteral', initFromLiteral = literal, learningRateMultiplier = 0.0)
|
||||
Input(dims, tag='feature') = new ComputationNode [ operation = 'InputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]
|
||||
SparseInput(dims, tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]
|
||||
ImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', tag='feature') = new ComputationNode [ operation = 'InputValue' ; isImage = true /*plus the function args*/ ]
|
||||
SparseImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; isImage = true /*plus the function args*/ ]
|
||||
DynamicAxis(tag='') = new ComputationNode [ operation = 'DynamicAxis' ; /*plus the function args*/ ]
|
||||
Input(dims, dynamicAxis='', tag='feature') = new ComputationNode [ operation = 'InputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]
|
||||
# TODO: change from dynamicAxis by name to dynamicAxis being an actual object
|
||||
SparseInput(dims, dynamicAxis='', tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]
|
||||
ImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', dynamicAxis='', tag='feature') = new ComputationNode [ operation = 'InputValue' ; isImage = true /*plus the function args*/ ]
|
||||
SparseImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', dynamicAxis='', tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; isImage = true /*plus the function args*/ ]
|
||||
EnvironmentInput(propertyName, tag='') = new ComputationNode [ operation = 'EnvironmentInput' /*plus the function args*/ ]
|
||||
ConstantTensor(val, dims, tag='') = ParameterTensor(dims, learningRateMultiplier = 0, init = 'fixedValue', value = val)
|
||||
Constant(val, rows = 1, cols = 1, tag='') = Parameter(rows, cols, learningRateMultiplier = 0, init = 'fixedValue', value = val)
|
||||
|
@ -79,8 +81,9 @@ Transpose(x) = TransposeDimensions(x, 1, 2)
|
|||
Times(A, B, outputRank=1, tag='') = new ComputationNode [ operation = 'Times' ; inputs = ( A : B ) /*plus the function args*/ ]
|
||||
Logistic(label, probability, tag='') = new ComputationNode [ operation = 'Logistic' ; inputs = (label : probability) /*plus the function args*/ ]
|
||||
WeightedLogistic(label, probability, instanceWeight, tag='') = new ComputationNode [ operation = 'Logistic' ; inputs = (label : probability : instanceWeight) /*plus the function args*/ ]
|
||||
ReconcileMBLayout(dataInput, layoutInput, tag='') = new ComputationNode [ operation = 'ReconcileMBLayout' ; inputs = (dataInput : layoutInput) /*plus the function args*/ ]
|
||||
CastAs (type, data) = ReconcileMBLayout (data, type) # read as CastAs<type>(data) where the cast may consist of rearranging the data w.r.t. MBLayout or broadcasting across sequence items
|
||||
ReconcileDynamicAxis(dataInput, layoutInput, tag='') = new ComputationNode [ operation = 'ReconcileDynamicAxis' ; inputs = (dataInput : layoutInput) /*plus the function args*/ ]
|
||||
ReconcileMBLayout = ReconcileDynamicAxis # back compat
|
||||
CastAs (type, data) = ReconcileDynamicAxis (data, type) # read as CastAs<type>(data) where the cast may consist of rearranging the data w.r.t. MBLayout or broadcasting across sequence items
|
||||
Convolution(weightNode, inputValueNode, kernelDims, mapDims = 1, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', maxTempMemSizeInSamples = 0, tag='') = new ComputationNode [ operation = 'Convolution' ; inputs = (weightNode : inputValueNode); kernelShape = new TensorShape [ dims = kernelDims ] ; mapCount = new TensorShape [ dims = mapDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimSharing = new BoolVector [ items = sharing ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
|
||||
Pooling(input, poolKind/*'max'|'average'*/, kernelDims, stride=1, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'Pooling' ; inputs = (input); pool = poolKind ; kernelShape = new TensorShape [ dims = kernelDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
|
||||
MaxPooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'MaxPooling' ; inputs = input /*plus the function args*/ ]
|
||||
|
|
|
@ -1,134 +0,0 @@
|
|||
#if 0 // this entire file can be removed once CNTK.core.bs works
|
||||
// ExperimentalNetworkBuilder.cpp -- interface to new version of NDL (and config) parser --fseide
|
||||
|
||||
#define _CRT_NONSTDC_NO_DEPRECATE // make VS accept POSIX functions without _
|
||||
#define _CRT_SECURE_NO_WARNINGS // "secure" CRT not available on all platforms --add this at the top of all CPP files that give "function or variable may be unsafe" warnings
|
||||
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
// TODO: move to actual text files to be included
|
||||
|
||||
wstring standardFunctions =
|
||||
L"Print(value, format='') = new PrintAction [ what = value /*; how = format*/ ] \n"
|
||||
L"Debug(value, say = '', enabled = true) = new Debug [ /*macro arg values*/ ] \n"
|
||||
L"Format(value, format) = new StringFunction [ what = 'Format' ; arg = value ; how = format ] \n"
|
||||
L"Replace(s, from, to) = new StringFunction [ what = 'Replace' ; arg = s ; replacewhat = from ; withwhat = to ] \n"
|
||||
L"Substr(s, begin, num) = new StringFunction [ what = 'Substr' ; arg = s ; pos = begin ; chars = num ] \n"
|
||||
L"Chr(c) = new StringFunction [ what = 'Chr' ; arg = c ] \n"
|
||||
L"Floor(x) = new NumericFunction [ what = 'Floor' ; arg = x ] \n"
|
||||
L"Length(x) = new NumericFunction [ what = 'Length' ; arg = x ] \n"
|
||||
L"Ceil(x) = -Floor(-x) \n"
|
||||
L"Round(x) = Floor(x+0.5) \n"
|
||||
L"Sign(x) = if x > 0 then 1 else if x < 0 then -1 else 0 \n"
|
||||
L"Min(a,b) = if a < b then a else b \n"
|
||||
L"Max(a,b) = if a > b then a else b \n"
|
||||
L"Fac(n) = if n > 1 then Fac(n-1) * n else 1 \n";
|
||||
|
||||
wstring commonMacros =
|
||||
L"BFF(in, rows, cols) = [ B = Parameter(rows, 1, init = 'fixedValue', value = 0) ; W = Parameter(rows, cols) ; z = W*in+B ] \n"
|
||||
L"SBFF(in, rows, cols) = [ Eh = Sigmoid(BFF(in, rows, cols).z) ] \n "
|
||||
L"MeanVarNorm(feat) = PerDimMeanVarNormalization(feat, Mean(feat), InvStdDev(feat)) \n"
|
||||
L"LogPrior(labels) = Log(Mean(labels)) \n";
|
||||
|
||||
wstring computationNodes = // TODO: use actual TypeName() here? would first need to make it a wide string; we should also extract those two methods into the base macro
|
||||
L"LearnableParameter(rows, cols, learningRateMultiplier = 1.0, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ dims = (rows : cols) ] /*plus the function args*/ ]\n"
|
||||
L"Parameter = LearnableParameter // deprecated \n"
|
||||
L"ParameterTensor(dims, learningRateMultiplier = 1.0, init = 'uniform'/*|fixedValue|gaussian|fromFile*/, initValueScale = 1, value = 0, initFromFilePath = '', initOnCPUOnly=true, randomSeed=-1, tag='') = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
|
||||
// TODO: ImageParameter?
|
||||
// ^^ already works; vv untested
|
||||
L"Input(dims, tag='feature') = new ComputationNode [ operation = 'InputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]\n" // note: naming a little inconsistent // TODO: re-test after flag change
|
||||
L"SparseInput(dims, tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; shape = new TensorShape [ /*dims*/ ] ; isImage = false /*plus the function args*/ ]\n"
|
||||
L"ImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', tag='feature') = new ComputationNode [ operation = 'InputValue' ; isImage = true /*plus the function args*/ ]\n"
|
||||
L"SparseImageInput(imageWidth, imageHeight, imageChannels, imageLayout='CHW', tag='feature') = new ComputationNode [ operation = 'SparseInputValue' ; isImage = true /*plus the function args*/ ]\n"
|
||||
L"Constant(val, rows = 1, cols = 1, tag='') = Parameter(rows, cols, learningRateMultiplier = 0, init = 'fixedValue', value = val) \n"
|
||||
L"PastValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'PastValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
|
||||
L"FutureValue(dims, input, timeStep = 1, defaultHiddenActivation = 0.1, tag='') = new ComputationNode [ operation = 'FutureValue' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
|
||||
// TODO: ^^ DelayedValues no longer need to know their dimension. That is inferred in Validation.
|
||||
L"Shift(input, fromOffset, boundaryValue, boundaryMode=-1/*context*/, dim=-1, tag='') = new ComputationNode [ operation = 'Shift' ; inputs = (input : boundaryValue) /*plus the function args*/ ]\n"
|
||||
L"RowSlice(startIndex, numRows, input, tag='') = new ComputationNode [ operation = 'RowSlice' ; inputs = input /*plus the function args*/ ]\n"
|
||||
L"RowRepeat(input, numRepeats, tag='') = new ComputationNode [ operation = 'RowRepeat' ; inputs = input /*plus the function args*/ ]\n"
|
||||
L"RowStack(inputs, tag='') = new ComputationNode [ operation = 'RowStack' /*plus the function args*/ ]\n"
|
||||
L"Reshape(input, numRows, imageWidth = 0, imageHeight = 0, imageChannels = 0, tag='') = new ComputationNode [ operation = 'LegacyReshape' ; inputs = input /*plus the function args*/ ]\n"
|
||||
L"NewReshape(input, dims, beginDim=0, endDim=0, tag='') = new ComputationNode [ operation = 'Reshape' ; inputs = input ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]\n"
|
||||
L"ReshapeDimension(x, dim, tensorShape) = NewReshape(x, tensorShape, beginDim=dim, endDim=dim + 1) \n"
|
||||
L"FlattenDimensions(x, dim, num) = NewReshape(x, 0, beginDim=dim, endDim=dim + num) \n"
|
||||
L"SplitDimension(x, dim, N) = ReshapeDimension(x, dim, 0:N) \n"
|
||||
L"TransposeDimensions(input, dim1, dim2, tag='') = new ComputationNode [ operation = 'TransposeDimensions' ; inputs = input /*plus the function args*/ ]\n"
|
||||
L"Transpose(x) = TransposeDimensions(x, 1, 2)\n"
|
||||
L"Times(A, B, outputRank=1, tag='') = new ComputationNode [ operation = 'Times' ; inputs = ( A : B ) /*plus the function args*/ ]\n"
|
||||
// TODO: Logistic should be generated with with BinaryStandardNode macro below.
|
||||
L"Logistic(label, probability, tag='') = new ComputationNode [ operation = 'Logistic' ; inputs = (label : probability) /*plus the function args*/ ]\n"
|
||||
L"WeightedLogistic(label, probability, instanceWeight, tag='') = new ComputationNode [ operation = 'Logistic' ; inputs = (label : probability : instanceWeight) /*plus the function args*/ ]\n"
|
||||
L"ReconcileMBLayout(dataInput, layoutInput, tag='') = new ComputationNode [ operation = 'ReconcileMBLayout' ; inputs = (dataInput : layoutInput) /*plus the function args*/ ]\n"
|
||||
L"Convolution(weightNode, inputValueNode, kernelWidth, kernelHeight, outputChannels, horizontalSubsample, verticalSubsample, zeroPadding = false, maxTempMemSizeInSamples = 0, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'Convolution' ; inputs = (weightNode : inputValueNode) /*plus the function args*/ ]\n"
|
||||
L"MaxPooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'MaxPooling' ; inputs = input /*plus the function args*/ ]\n"
|
||||
L"AveragePooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'AveragePooling' ; inputs = input /*plus the function args*/ ]\n"
|
||||
// TODO: define DelayedValue, with negative delay for future; cannot do this yet, need to be able to say something like delay = -(^.delay)
|
||||
// aliases
|
||||
L"ColumnwiseCrossProduct = KhatriRaoProduct // deprecated \n" // TODO: should it be deprecated? It is described as easier to understand in the CNTKBook.
|
||||
L"ClassificationError = ErrorPrediction \n"
|
||||
L"Delay = PastValue \n" // TODO: should it allow negative offsets and an if test here?
|
||||
L"BatchNormalization(input, scale, bias, runMean, runInvStdDev, eval, spatial, normalizationTimeConstant = 0, epsilon = 0.00001, useCntkEngine = true, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'BatchNormalization' ; inputs = (input : scale : bias : runMean : runInvStdDev) /*plus the function args*/ ]\n"
|
||||
// standard nodes. We use macros to define these strings.
|
||||
#define UnaryStandardNode(Op, a) L## #Op L"(" L## #a L", tag='') = new ComputationNode [ operation = '" L## #Op L"' ; inputs = " L## #a L" /*plus the function args*/ ]\n"
|
||||
#define BinaryStandardNode(Op, a, b) L## #Op L"(" L## #a L", " L## #b L", tag='') = new ComputationNode [ operation = '" L## #Op L"' ; inputs = (" L## #a L" : " L## #b L") /*plus the function args*/ ]\n"
|
||||
#define TernaryStandardNode(Op, a, b, c) L## #Op L"(" L## #a L", " L## #b L", " L## #c L", tag='') = new ComputationNode [ operation = '" L## #Op L"' ; inputs = (" L## #a L" : " L## #b L" : " L## #c L") /*plus the function args*/ ]\n"
|
||||
#define QuaternaryStandardNode(Op, a, b, c, d) L## #Op L"(" L## #a L", " L## #b L", " L## #c L", " L## #d L", tag='') = new ComputationNode [ operation = '" L## #Op L"' ; inputs = (" L## #a L" : " L## #b L" : " L## #c L" : " L## #d L") /*plus the function args*/ ]\n"
|
||||
#ifdef COMING_SOON
|
||||
TernaryStandardNode(CRF, labelVectorSequence, positionDependenScoreVectorSequence, transitionScores) // TODO: better names
|
||||
#endif
|
||||
UnaryStandardNode(Abs, x)
|
||||
QuaternaryStandardNode(ClassBasedCrossEntropyWithSoftmax, labelClassDescriptorVectorSequence, mainInputInfo, mainWeight, classLogProbsBeforeSoftmax)
|
||||
// BUGBUG: the commented-out ones are not mentioned in the CNTK book, nor are their parameters documented in the source code
|
||||
BinaryStandardNode(ColumnElementTimes, aVectorSequence, anotherVectorSequence)
|
||||
BinaryStandardNode(CosDistance, aVectorSequence, anotherVectorSequence)
|
||||
QuaternaryStandardNode(CosDistanceWithNegativeSamples, aVectorSequence, anotherVectorSequence, numShifts, numNegSamples)
|
||||
//BinaryStandardNode(CosDistanceWithNegativeSamplesNode)
|
||||
UnaryStandardNode(Cosine, x)
|
||||
BinaryStandardNode(CrossEntropy, refProbVectorSequence, outProbVectorSequence)
|
||||
BinaryStandardNode(CrossEntropyWithSoftmax, labelVectorSequence, outProbVectorSequence)
|
||||
BinaryStandardNode(DiagTimes, diagonalMatrixAsColumnVector, matrix)
|
||||
UnaryStandardNode(Dropout, activationVectorSequence)
|
||||
//BinaryStandardNode(DummyCriterionNode)
|
||||
BinaryStandardNode(ElementTimes, aMatrix, anotherMatrix)
|
||||
BinaryStandardNode(ErrorPrediction, labelVectorSequence, outVectorSequence) // CNTKBook: ClassificationError?
|
||||
UnaryStandardNode(Exp, x)
|
||||
QuaternaryStandardNode(GMMLogLikelihood, unnormalizedPriorVector, meansAsRows, logStdDevAsRows, dataVectorSequence)
|
||||
UnaryStandardNode(InvStdDev, dataVectorSequence)
|
||||
BinaryStandardNode(KhatriRaoProduct, leftMatrix, rightMatrix)
|
||||
//BinaryStandardNode(LSTMNode)
|
||||
UnaryStandardNode(Log, x)
|
||||
UnaryStandardNode(LogSoftmax, z)
|
||||
//BinaryStandardNode(LookupTableNode)
|
||||
UnaryStandardNode(MatrixL1Reg, matrix)
|
||||
UnaryStandardNode(MatrixL2Reg, matrix)
|
||||
// BUGBUG: CNTKBook also mentions L1Norm and L2Norm
|
||||
UnaryStandardNode(Mean, dataVectorSequence)
|
||||
BinaryStandardNode(Minus, leftMatrix, rightMatrix)
|
||||
UnaryStandardNode(Negate, input)
|
||||
TernaryStandardNode(PerDimMeanVarDeNormalization, dataVectorSequence, meanVector, invStdDevVector) // TODO: correct?
|
||||
TernaryStandardNode(PerDimMeanVarNormalization, dataVectorSequence, meanVector, invStdDevVector)
|
||||
BinaryStandardNode(Plus, leftMatrix, rightMatrix)
|
||||
UnaryStandardNode(RectifiedLinear, z)
|
||||
//BinaryStandardNode(RowElementTimesNode)
|
||||
BinaryStandardNode(Scale, scalarScalingFactor, matrix)
|
||||
#ifdef COMING_SOON
|
||||
//BinaryStandardNode(SequenceDecoderNode)
|
||||
#endif
|
||||
UnaryStandardNode(Sigmoid, z)
|
||||
UnaryStandardNode(Softmax, z)
|
||||
UnaryStandardNode(Hardmax, z)
|
||||
BinaryStandardNode(SquareError, aMatrix, anotherMatrix)
|
||||
UnaryStandardNode(SumColumnElements, z)
|
||||
UnaryStandardNode(SumElements, matrix)
|
||||
UnaryStandardNode(Tanh, z)
|
||||
UnaryStandardNode(TimeReverse, vectorSequence)
|
||||
BinaryStandardNode(TransposeTimes, leftMatrix, rightMatrix)
|
||||
// those nodes are deprecated, we won't implement them in BS:
|
||||
//BinaryStandardNode(NoiseContrastiveEstimationNode)
|
||||
//BinaryStandardNode(ParallelNode)
|
||||
//BinaryStandardNode(StrideTimesNode)
|
||||
;
|
||||
#endif
|
|
@ -205,7 +205,6 @@
|
|||
<ClCompile Include="BrainScript\BrainScriptEvaluator.cpp" />
|
||||
<ClCompile Include="BrainScript\BrainScriptParser.cpp" />
|
||||
<ClCompile Include="BrainScript\BrainScriptTest.cpp" />
|
||||
<ClCompile Include="BrainScript\ExperimentalNetworkBuilder.cpp" />
|
||||
<ClCompile Include="CNTK.cpp" />
|
||||
<ClCompile Include="ModelEditLanguage.cpp" />
|
||||
<ClCompile Include="stdafx.cpp" />
|
||||
|
|
|
@ -44,9 +44,6 @@
|
|||
<ClCompile Include="BrainScript\BrainScriptTest.cpp">
|
||||
<Filter>BrainScript</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="BrainScript\ExperimentalNetworkBuilder.cpp">
|
||||
<Filter>BrainScript</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Common\ExceptionWithCallStack.cpp">
|
||||
<Filter>Common</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
@ -31,6 +31,34 @@ static const char* GetReaderName(const string& precision)
|
|||
InvalidArgument("DataReader: The 'precision' parameter must be 'float' or 'double'.");
|
||||
}
|
||||
|
||||
void DataReaderBase::SetMinibatchLayout(StreamMinibatchInputs& minibatch)
|
||||
{
|
||||
assert(minibatch.begin() != minibatch.end());
|
||||
|
||||
auto& pMBLayout = minibatch.begin()->second.pMBLayout;
|
||||
// This is only allowed for old readers, which support a single layout for all inputs.
|
||||
for (const auto& iter : minibatch)
|
||||
{
|
||||
assert(iter.second.pMBLayout == pMBLayout);
|
||||
// TODO: This should be a runtime check, not an assert() that only runs in Debug.
|
||||
UNUSED(iter);
|
||||
}
|
||||
|
||||
CopyMBLayoutTo(pMBLayout);
|
||||
}
|
||||
|
||||
bool DataReaderBase::GetMinibatch(StreamMinibatchInputs& minibatch)
|
||||
{
|
||||
if (TryGetMinibatch(minibatch))
|
||||
{
|
||||
SetMinibatchLayout(minibatch);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
template <class ConfigRecordType>
|
||||
void DataReader::InitFromConfig(const ConfigRecordType& /*config*/)
|
||||
{
|
||||
|
|
|
@ -59,28 +59,29 @@ public:
|
|||
/*const*/ TensorShape sampleLayout;
|
||||
|
||||
// constructor
|
||||
Input(MatrixBasePtr matrix, MBLayoutPtr pMBLayout, TensorShape sampleLayout) : matrix(matrix), pMBLayout(pMBLayout), sampleLayout(sampleLayout)
|
||||
Input(MatrixBasePtr matrix, MBLayoutPtr pMBLayout, TensorShape sampleLayout) :
|
||||
matrix(matrix), pMBLayout(pMBLayout), sampleLayout(sampleLayout)
|
||||
{
|
||||
assert(matrix);
|
||||
}
|
||||
Input(){} // some STL classes need this for general happiness
|
||||
Input() {} // some STL classes need this for general happiness
|
||||
|
||||
// helper for typecasting the matrix pointer
|
||||
template<class ElemType>
|
||||
template<class ElemType>
|
||||
Matrix<ElemType>& GetMatrix(const wchar_t* name/*for debugging only*/ = L"(unknown)") const
|
||||
{
|
||||
{
|
||||
assert(matrix);
|
||||
auto* matrixp = dynamic_cast<Matrix<ElemType>*>(matrix.get());
|
||||
if (!matrixp)
|
||||
{
|
||||
// print a rather rich error to track down a regression failure
|
||||
if (!matrixp)
|
||||
{
|
||||
// print a rather rich error to track down a regression failure
|
||||
auto isFloat = !!dynamic_cast<Matrix<float>*> (matrix.get());
|
||||
auto isDouble = !!dynamic_cast<Matrix<double>*>(matrix.get());
|
||||
LogicError("GetMatrix<%s>: Attempted to access input stream '%ls' with wrong precision, got %s {%d,%d} instead of %s.",
|
||||
typeid(ElemType).name(), name, typeid(matrix.get()).name(), (int)isFloat, (int)isDouble, typeid(Matrix<ElemType>*).name());
|
||||
}
|
||||
return *matrixp;
|
||||
}
|
||||
return *matrixp;
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -246,6 +247,21 @@ typedef std::shared_ptr<IDataReader> IDataReaderPtr;
|
|||
extern "C" DATAREADER_API void GetReaderF(IDataReader** preader);
|
||||
extern "C" DATAREADER_API void GetReaderD(IDataReader** preader);
|
||||
|
||||
// The sole purpose of this base class is to provide backwards compatibility for (old)
|
||||
// readers that do not support multiple mb layouts.
|
||||
class DataReaderBase : public IDataReader
|
||||
{
|
||||
protected:
|
||||
// Verifies that all inputs share the same layout (have the same layout pointer)
|
||||
// and copies the provided layout into the minibatch layout.
|
||||
// This method is needed for backwards-compatibility and only meant to be used by old readers!
|
||||
void SetMinibatchLayout(StreamMinibatchInputs& minibatch);
|
||||
|
||||
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices) = 0;
|
||||
public:
|
||||
virtual bool GetMinibatch(StreamMinibatchInputs& matrices) override;
|
||||
};
|
||||
|
||||
// Data Reader class
|
||||
// interface for clients of the Data Reader
|
||||
// mirrors the IDataReader interface, except the Init method is private (use the constructor)
|
||||
|
@ -292,7 +308,6 @@ class DataReader : public IDataReader, protected Plugin, public ScriptableObject
|
|||
// NOTE: this destroys the object, and it can't be used past this point.
|
||||
// The reason why this is not just a destructor is that it goes across a DLL boundary.
|
||||
virtual void Destroy() override;
|
||||
|
||||
public:
|
||||
// DataReader Constructor
|
||||
// config - [in] configuration parameters for the datareader
|
||||
|
|
|
@ -100,29 +100,29 @@ struct MBLayout
|
|||
{
|
||||
return seqId == other.seqId && s == other.s && tBegin == other.tBegin && tEnd == other.tEnd;
|
||||
}
|
||||
size_t GetNumTimeSteps() const
|
||||
{
|
||||
return (size_t)(tEnd - tBegin);
|
||||
}
|
||||
size_t GetNumTimeSteps() const { return (size_t)(tEnd - tBegin); }
|
||||
};
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// construction
|
||||
// -------------------------------------------------------------------
|
||||
|
||||
MBLayout(size_t numParallelSequences, size_t numTimeSteps)
|
||||
MBLayout(size_t numParallelSequences, size_t numTimeSteps, const std::wstring &name)
|
||||
: m_distanceToStart(CPUDEVICE), m_distanceToEnd(CPUDEVICE), m_columnsValidityMask(CPUDEVICE)
|
||||
{
|
||||
Init(numParallelSequences, numTimeSteps);
|
||||
SetUniqueAxisName(name != L"" ? name : L"DynamicAxis");
|
||||
}
|
||||
MBLayout()
|
||||
: MBLayout(1, 0)
|
||||
: MBLayout(1, 0, L"")
|
||||
{
|
||||
}
|
||||
|
||||
// copy the content of another MBLayoutPtr over
|
||||
// Use this instead of actual assignment to make it super-obvious that this is not copying the pointer but actual content. The pointer is kept fixed.
|
||||
void CopyFrom(const MBLayoutPtr& other)
|
||||
// Use "keepName" if the "identity" of the target is to be preserved, e.g.
|
||||
// while copying from reader space to network space.
|
||||
void CopyFrom(const MBLayoutPtr& other, bool keepName=false)
|
||||
{
|
||||
m_numTimeSteps = other->m_numTimeSteps;
|
||||
m_numParallelSequences = other->m_numParallelSequences;
|
||||
|
@ -141,7 +141,8 @@ struct MBLayout
|
|||
m_columnsValidityMask.SetValue(other->m_columnsValidityMask);
|
||||
m_writable = other->m_writable;
|
||||
|
||||
m_axisName = other->m_axisName;
|
||||
if (!keepName)
|
||||
m_axisName = other->m_axisName;
|
||||
}
|
||||
|
||||
// Destructive copy that steals ownership if the content, like std::move()
|
||||
|
@ -275,7 +276,7 @@ public:
|
|||
}
|
||||
|
||||
// return all sequences stored in this minibatch
|
||||
const vector<SequenceInfo> &GetAllSequences() const
|
||||
const vector<SequenceInfo>& GetAllSequences() const
|
||||
{
|
||||
return m_sequences;
|
||||
}
|
||||
|
@ -287,7 +288,7 @@ public:
|
|||
const Matrix<char>& GetColumnsValidityMask(DEVICEID_TYPE deviceId) const;
|
||||
|
||||
// compare whether two layouts are the same
|
||||
bool operator==(const MBLayout &other) const
|
||||
bool operator==(const MBLayout& other) const
|
||||
{
|
||||
if (this == &other)
|
||||
return true;
|
||||
|
@ -441,8 +442,8 @@ public:
|
|||
bool HasGaps(const FrameRange &fr) const;
|
||||
|
||||
// test boundary flags for a specific condition
|
||||
bool IsBeyondStartOrEnd(const FrameRange &fr) const;
|
||||
bool IsGap(const FrameRange &fr) const;
|
||||
bool IsBeyondStartOrEnd(const FrameRange& fr) const;
|
||||
bool IsGap(const FrameRange& fr) const;
|
||||
|
||||
// test whether at least one sequence crosses the bounds of this minibatch
|
||||
bool HasSequenceBeyondBegin() const
|
||||
|
@ -555,7 +556,7 @@ private:
|
|||
// Meant to guard in lazy creation of m_columnsValidityMask.
|
||||
mutable bool m_writable;
|
||||
|
||||
// the axis
|
||||
// The axis this MBLayout represents.
|
||||
// For now only a string meant for debugging.
|
||||
std::wstring m_axisName;
|
||||
|
||||
|
@ -751,6 +752,7 @@ inline bool MBLayout::HasGaps() const
|
|||
{
|
||||
return m_numGapFrames > 0; /*HasGaps(FrameRange());*/
|
||||
}
|
||||
|
||||
inline bool MBLayout::HasGaps(const FrameRange &fr) const
|
||||
{
|
||||
CheckIsValid();
|
||||
|
@ -828,7 +830,7 @@ inline size_t MBLayout::GetActualNumSamples() const { return m_numFramesDeclared
|
|||
// only called from MaskMissingColumnsTo()
|
||||
// TODO: Can probably be faster by using the sequence array directly.
|
||||
// TODO: Or should we just blast m_distanceToStart to GPU, and maks based on that? It is small compared to features.
|
||||
inline const Matrix<char> &MBLayout::GetColumnsValidityMask(DEVICEID_TYPE deviceId) const
|
||||
inline const Matrix<char>& MBLayout::GetColumnsValidityMask(DEVICEID_TYPE deviceId) const
|
||||
{
|
||||
CheckIsValid();
|
||||
// lazily compute the validity mask
|
||||
|
@ -947,7 +949,7 @@ static inline std::pair<size_t, size_t> ColumnRangeWithMBLayoutFor(size_t numCol
|
|||
// MBLayout of data and of FrameRange must be identical pointers,
|
||||
// or in case of broadcasting, respective parent pointers.
|
||||
// MBLayouts that are identical in content but not object identity (pointer) are not admissible.
|
||||
// For those cases, use a ReconcileMBLayout node.
|
||||
// For those cases, use a ReconcileDynamicAxis node.
|
||||
if (fr.m_pMBLayout != pMBLayout)
|
||||
{
|
||||
// if broadcast allowed then it is allowed to broadcast from an outer-loop value
|
||||
|
@ -955,9 +957,9 @@ static inline std::pair<size_t, size_t> ColumnRangeWithMBLayoutFor(size_t numCol
|
|||
if (fr.m_broadcastAllowed && !pMBLayout && numCols == 1)
|
||||
return std::pair<size_t, size_t>(0, numCols);
|
||||
if (fr.m_pMBLayout && pMBLayout && *fr.m_pMBLayout == *pMBLayout)
|
||||
LogicError("DataFor: FrameRange's MBLayout inconsistent with matrix. They are compatible though--are you missing a ReconcileMBLayout operation?");
|
||||
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix. They are compatible though--are you missing a ReconcileDynamicAxis operation?");
|
||||
else
|
||||
LogicError("DataFor: FrameRange's MBLayout inconsistent with matrix.");
|
||||
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix.");
|
||||
}
|
||||
// if FrameRange refers to whole minibatch (map mode)
|
||||
// or if we don't even have a layout
|
||||
|
@ -1040,7 +1042,7 @@ static inline std::pair<DimensionVector, DimensionVector> TensorSliceWithMBLayou
|
|||
// MBLayout of data and of FrameRange must be identical pointers,
|
||||
// or in case of broadcasting, respective parent pointers.
|
||||
// MBLayouts that are identical in content but not object identity (pointer) are not admissible.
|
||||
// For those cases, use a ReconcileMBLayout node.
|
||||
// For those cases, use a ReconcileDynamicAxis node.
|
||||
if (isTimeIteration && fr.m_pMBLayout != pMBLayout)
|
||||
{
|
||||
// if broadcast allowed then it is allowed to broadcast from an outer-loop value
|
||||
|
@ -1048,10 +1050,10 @@ static inline std::pair<DimensionVector, DimensionVector> TensorSliceWithMBLayou
|
|||
if (fr.m_pMBLayout /*get data for a loop*/ && !pMBLayout /*'data' is not samples*/ && fr.m_broadcastAllowed /*we're OK with that*/)
|
||||
; // the time dimension is broadcasting--leave it as is
|
||||
else if (fr.m_pMBLayout && pMBLayout && *fr.m_pMBLayout == *pMBLayout)
|
||||
LogicError("DataFor: FrameRange's MBLayout inconsistent with matrix. They are compatible though--are you missing a ReconcileMBLayout operation? %s vs. %s",
|
||||
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix. They are compatible though--are you missing a ReconcileDynamicAxis operation? %s vs. %s",
|
||||
static_cast<string>(*(fr.m_pMBLayout)).c_str(), static_cast<string>(*(pMBLayout)).c_str());
|
||||
else
|
||||
LogicError("DataFor: FrameRange's MBLayout inconsistent with matrix: %s vs. %s",
|
||||
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix: %s vs. %s",
|
||||
static_cast<string>(*(fr.m_pMBLayout)).c_str(), static_cast<string>(*(pMBLayout)).c_str());
|
||||
}
|
||||
// if FrameRange refers to whole minibatch (map mode)
|
||||
|
@ -1123,8 +1125,10 @@ static inline void MaskMissingColumnsTo(Matrix<ElemType>& matrixToMask, const MB
|
|||
TensorView<ElemType>(matrixSliceToMask).DoMaskNegativeOf(0, TensorView<ElemType>(matrixSliceToMask), TensorView<ElemType>(maskSlice), 1); val;
|
||||
#else
|
||||
const auto& maskMatrix = pMBLayout->GetColumnsValidityMask(matrixToMask.GetDeviceId());
|
||||
|
||||
maskMatrix.TransferToDeviceIfNotThere(matrixToMask.GetDeviceId(), /*ismoved=*/ false, /*emptyTransfer=*/ false, /*updatePreferredDevice=*/ false);
|
||||
auto maskSlice = DataWithMBLayoutFor(maskMatrix, fr, pMBLayout);
|
||||
|
||||
auto matrixSliceToMask = DataWithMBLayoutFor(matrixToMask, fr, pMBLayout);
|
||||
matrixSliceToMask.MaskColumnsValue(maskSlice, val);
|
||||
#endif
|
||||
|
|
|
@ -79,7 +79,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// - special case: swapping between sample and MBLayout, e.g. turn a sample dimension to a time dimension
|
||||
// - Validate() stage will automatically infer tensor dimensions from inputs, and also infer downwards into LearnableParameters where requested
|
||||
//
|
||||
// Interfacing to and inplementation in Matrix lib:
|
||||
// Interfacing to and implementation in Matrix lib:
|
||||
// - a Tensor is realized as a type TensorView = { Matrix&, TensorShape& } (i.e. tensors don't own their memory)
|
||||
// - Matrix lib will contain overloads for relevant operations that take Tensor& instead of Matrix&.
|
||||
// - elementwise ops will go through a single bottleneck function that deals with matching dimensions (extend, broadcast) and flattening
|
||||
|
|
|
@ -50,13 +50,14 @@ public:
|
|||
|
||||
ComputationNetwork() :
|
||||
m_randomSeedOffset(0),
|
||||
m_isCompiled(false),
|
||||
m_areMatricesAllocated(false),
|
||||
m_pMBLayoutOfNetwork(make_shared<MBLayout>()),
|
||||
m_isCompiled(false),
|
||||
m_areMatricesAllocated(false),
|
||||
m_pMBLayoutOfNetwork(make_shared<MBLayout>(1, 0, L"*")),
|
||||
m_environment(make_shared<ComputationEnvironment>())
|
||||
{
|
||||
m_pMBLayoutOfNetwork->SetAxisName(L"T");
|
||||
//m_pMBLayoutOfNetwork->SetAxisName(L"T");
|
||||
}
|
||||
|
||||
ComputationNetwork(DEVICEID_TYPE deviceId)
|
||||
: ComputationNetwork()
|
||||
{
|
||||
|
@ -289,6 +290,8 @@ public:
|
|||
// This returns max number of columns over the feature nodes.
|
||||
// Note that if we have multiple slices, MB size != #frames.
|
||||
// BUGBUG: This will break once we have inconsistent layouts.
|
||||
// BUGBUG: The number computed here is completely off (it the layout has gaps
|
||||
// they will also be counted towards the actualMBSize)
|
||||
size_t DetermineActualMBSizeFromFeatures() const
|
||||
{
|
||||
size_t actualMBSize = 0;
|
||||
|
|
|
@ -48,6 +48,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
else if (nodeType == OperationNameOf(DiagTimesNode)) return New<DiagTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(DropoutNode)) return New<DropoutNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(DummyCriterionNode)) return New<DummyCriterionNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(DynamicAxisNode)) return New<DynamicAxisNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ElementTimesNode)) return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(EnvironmentInputNode)) return New<EnvironmentInputNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ErrorPredictionNode)) return New<ErrorPredictionNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
@ -75,7 +76,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
else if (nodeType == OperationNameOf(PerDimMeanVarDeNormalizationNode)) return New<PerDimMeanVarDeNormalizationNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(PassNode)) return New<PassNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(PlusNode)) return New<PlusNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ReconcileMBLayoutNode)) return New<ReconcileMBLayoutNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ReconcileDynamicAxisNode)) return New<ReconcileDynamicAxisNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ReciprocalNode)) return New<ReciprocalNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(RectifiedLinearNode)) return New<RectifiedLinearNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == OperationNameOf(ReshapeNode)) return New<ReshapeNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
@ -110,6 +111,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
|
|||
// TODO: DiagTimes is also an alias of ElementTimes; current separate implementation is unnecessary.
|
||||
else if (nodeType == L"PerDimMeanVarNormalizationNode") return New<PerDimMeanVarNormalizationNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"PerDimMeanVarDeNormalizationNode") return New<PerDimMeanVarDeNormalizationNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"ReconcileMBLayout") return New<ReconcileDynamicAxisNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"RowElementTimes") return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"RowSlice") return New<SliceNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
else if (nodeType == L"Scale") return New<ElementTimesNode<ElemType>>(forward<_Types>(_Args)...);
|
||||
|
@ -193,28 +195,29 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Creat
|
|||
return net.AddNodeToNetWithElemType(New<LearnableParameter<ElemType>>(net.GetDeviceId(), paramName, tensorShape));
|
||||
}
|
||||
|
||||
// TODO: change these to take an actual object instead of a name for dynamicAxis
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateInputNode(const std::wstring& inputName, const size_t rows)
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateInputNode(const std::wstring& inputName, const size_t rows, const wstring& dynamicAxisName)
|
||||
{
|
||||
return net.AddNodeToNetWithElemType(New<InputValue<ElemType>>(net.GetDeviceId(), inputName, rows));
|
||||
return net.AddNodeToNetWithElemType(New<InputValue<ElemType>>(net.GetDeviceId(), inputName, rows, dynamicAxisName));
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateSparseInputNode(const std::wstring& inputName, const size_t rows)
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateSparseInputNode(const std::wstring& inputName, const size_t rows, const wstring& dynamicAxisName)
|
||||
{
|
||||
return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, rows));
|
||||
return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, rows, dynamicAxisName));
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateInputNode(const std::wstring& inputName, const TensorShape& sampleLayout)
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateInputNode(const std::wstring& inputName, const TensorShape& sampleLayout, const wstring& dynamicAxisName)
|
||||
{
|
||||
return net.AddNodeToNetWithElemType(New<InputValue<ElemType>>(net.GetDeviceId(), inputName, sampleLayout));
|
||||
return net.AddNodeToNetWithElemType(New<InputValue<ElemType>>(net.GetDeviceId(), inputName, sampleLayout, dynamicAxisName));
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateSparseInputNode(const std::wstring& inputName, const TensorShape& imageLayout)
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateSparseInputNode(const std::wstring& inputName, const TensorShape& imageLayout, const wstring& dynamicAxisName)
|
||||
{
|
||||
return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, imageLayout));
|
||||
return net.AddNodeToNetWithElemType(New<SparseInputValue<ElemType>>(net.GetDeviceId(), inputName, imageLayout, dynamicAxisName));
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -461,6 +464,12 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Pass(
|
|||
return net.AddNodeToNetAndAttachInputs(New<PassNode<ElemType>>(net.GetDeviceId(), nodeName), { a });
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::DynamicAxis(const ComputationNodePtr a, const std::wstring& nodeName)
|
||||
{
|
||||
return net.AddNodeToNetAndAttachInputs(New<DynamicAxisNode<ElemType>>(net.GetDeviceId(), nodeName), { a });
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::InvStdDev(const ComputationNodePtr a, const std::wstring nodeName)
|
||||
{
|
||||
|
|
|
@ -48,10 +48,10 @@ public:
|
|||
ComputationNodePtr CreateLearnableParameter(const std::wstring& paramName, const TensorShape& tensorShape);
|
||||
// sparse matrix size is optionally specified
|
||||
// ComputationNodePtr CreateSparseLearnableParameter(const std::wstring & paramName, const size_t rows, const size_t cols, const size_t size = 0);
|
||||
ComputationNodePtr CreateInputNode(const std::wstring& inputName, const size_t rows);
|
||||
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const size_t rows);
|
||||
ComputationNodePtr CreateInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
|
||||
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const TensorShape& sampleLayout);
|
||||
ComputationNodePtr CreateInputNode(const std::wstring& inputName, const size_t rows, const wstring& dynamicAxisName = L"");
|
||||
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const size_t rows, const wstring& dynamicAxisName = L"");
|
||||
ComputationNodePtr CreateInputNode(const std::wstring& inputName, const TensorShape& sampleLayout, const wstring& dynamicAxisName = L"");
|
||||
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const TensorShape& sampleLayout, const wstring& dynamicAxisName = L"");
|
||||
ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape,
|
||||
const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
|
||||
ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples);
|
||||
|
@ -108,6 +108,7 @@ public:
|
|||
ComputationNodePtr Dropout(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr DummyCriterion(const ComputationNodePtr objectives, const ComputationNodePtr derivatives, const ComputationNodePtr prediction, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr ElementTimes(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr DynamicAxis(const ComputationNodePtr a, const std::wstring& nodeName = L"");
|
||||
ComputationNodePtr ErrorPrediction(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr Exp(const ComputationNodePtr a, const std::wstring nodeName = L"");
|
||||
ComputationNodePtr FutureValue(const ComputationNodePtr a, const float initHiddenActivity, const size_t row_size, size_t timeStep, const std::wstring nodeName = L"");
|
||||
|
|
|
@ -517,9 +517,8 @@ void ComputationNetwork::DetermineSetOfAllRoots()
|
|||
}
|
||||
|
||||
// initial setup of MBLayout pointers
|
||||
// - link all input nodes to one or more MBLayouts --TODO: Currently only one
|
||||
// - link all input nodes to one or more MBLayouts
|
||||
// - reset all others to nullptr, in expectation of a ValidateNetwork() pass
|
||||
// BUGBUG (Issue #95): Change this to use different MBLayouts for different inputs if so configured.
|
||||
void ComputationNetwork::ResetMBLayouts()
|
||||
{
|
||||
// reset to a well-defined MBLayout (any meaningful layout should do here)
|
||||
|
@ -530,10 +529,42 @@ void ComputationNetwork::ResetMBLayouts()
|
|||
for (const auto& node : GetAllNodesForRoot(nullptr))
|
||||
node->LinkToMBLayout(nullptr);
|
||||
|
||||
// then fix up inputs (all others get propagated upwards through Validate())
|
||||
// BUGBUG (Issue #95): Once we support mismatching layouts, this will be more involved. For now, everything shares the one layout that the Network knows about.
|
||||
// DynamicAxis nodes are (apart from the soon-to-be-deprecated network-wide MBLayout) the main holders of MBLayouts. Initialize them.
|
||||
// The only other instances are nodes that change the MBLayout, like WhereNode.
|
||||
for (auto node : GetNodesWithType(L"DynamicAxis"))
|
||||
node->LinkToMBLayout(make_shared<MBLayout>(1, 0, node->GetName()));
|
||||
|
||||
// This is now initialized inside of the Input nodes, with the proper connections.
|
||||
for (auto node : InputNodes(nullptr))
|
||||
node->LinkToMBLayout(m_pMBLayoutOfNetwork);
|
||||
{
|
||||
// TODO: use if (!Is<ITakesDynamicAxis>(node))...
|
||||
auto n = dynamic_pointer_cast<ITakesDynamicAxis>(node);
|
||||
if (!n)
|
||||
LogicError("Expected %ls to implement ITakesDynamicAxis, but it doesn't.", node->NodeDescription().c_str());
|
||||
std::wstring axisName = n->GetRequestedDynamicAxis();
|
||||
|
||||
if (axisName == L"")
|
||||
{
|
||||
// Legacy behavior: One shared MBLayout
|
||||
// TODO Remove m_pMBLayoutOfNetwork altogether. See issue 358.
|
||||
node->LinkToMBLayout(m_pMBLayoutOfNetwork);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto axisNode = GetNodeFromName(axisName);
|
||||
|
||||
if (!axisNode)
|
||||
RuntimeError("%ls: Can't find node '%ls' for retrieving dynamic axis.", axisNode->NodeDescription().c_str(), axisName.c_str());
|
||||
|
||||
// For now we require the node to be a DynamicAxisNode, though we could derive the same from other nodes. This would involve
|
||||
// more dependencies on the order in which things are evaluated, though.
|
||||
if (axisNode->OperationName() != L"DynamicAxis")
|
||||
RuntimeError("%ls: dynamicAxis argument must be of type DynamicAxis(), but got %ls.", node->NodeDescription().c_str(), axisNode->NodeDescription().c_str());
|
||||
if (!axisNode->HasMBLayout())
|
||||
LogicError("%ls: Expected %ls to have MBLayout, but it doesn't.", node->NodeDescription().c_str(), axisNode->NodeDescription().c_str());
|
||||
node->LinkToMBLayout(axisNode->GetMBLayout());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
@ -661,6 +692,11 @@ size_t ComputationNetwork::ValidateNodes(list<ComputationNodeBasePtr> nodes, boo
|
|||
{
|
||||
hasVisitedChild |= child->m_visited; // if not a single visited child then no point in validating
|
||||
allChildrenVisited &= child->m_visited;
|
||||
|
||||
// Make sure we don't use DynamicAxis in places where it was not designed for.
|
||||
// This is a stop-gap. We need a more coherent concept for passing of shapes.
|
||||
if (child->OperationName() == L"DynamicAxis")
|
||||
RuntimeError("%ls: Cannot be used as input to another node. It can only be used on the 'dynamicAxis' property of an Input node.", child->NodeDescription().c_str());
|
||||
}
|
||||
|
||||
// if there is not at least one visited child
|
||||
|
|
|
@ -100,7 +100,7 @@ void ComputationNodeBase::InferMBLayoutFromInputsForStandardCase(bool isFinalVal
|
|||
else if (!pMBLayout) // first non-NULL layout: just copy it
|
||||
pMBLayout = child->m_pMBLayout;
|
||||
else if (pMBLayout != child->m_pMBLayout && isFinalValidationPass) // got a layout--compare whether it is the same
|
||||
RuntimeError("%ls: InferMBLayoutFromInputsForStandardCase: Expected minibatch layouts to be the same between all children. Child '%ls' (%ls) uses a different layout than previously checked children and might get out of sync during runtime. If this is by design, use ReconcileMBLayout() to forward layouts between nodes.",
|
||||
RuntimeError("%ls: InferMBLayoutFromInputsForStandardCase: Expected minibatch layouts to be the same between all children. Child '%ls' (%ls) uses a different layout than previously checked children and might get out of sync during runtime. If this is by design, use ReconcileDynamicAxis() to forward layouts between nodes.",
|
||||
NodeDescription().c_str(), child->NodeName().c_str(), child->OperationName().c_str());
|
||||
}
|
||||
// all are consistent: install it
|
||||
|
@ -130,7 +130,7 @@ void ComputationNodeBase::ValidateBinaryZip(bool isFinalValidationPass, bool all
|
|||
if (isFinalValidationPass &&
|
||||
Input(0)->GetMBLayout() != Input(1)->GetMBLayout() && Input(0)->HasMBLayout() && Input(1)->HasMBLayout())
|
||||
{
|
||||
LogicError("%ls: Minibatch layouts are not the same between arguments and might get out of sync during runtime. If this is by design, use ReconcileMBLayout() to forward layouts between nodes.", NodeDescription().c_str());
|
||||
LogicError("%ls: Minibatch layouts are not the same between arguments and might get out of sync during runtime. If this is by design, use ReconcileDynamicAxis() to forward layouts between nodes.", NodeDescription().c_str());
|
||||
}
|
||||
|
||||
// result has tensor shape with dimensions being the max over both
|
||||
|
@ -176,6 +176,7 @@ void ComputationNodeBase::ValidateBinaryReduce(bool isFinalValidationPass)
|
|||
ComputationNodeBase::Validate(isFinalValidationPass);
|
||||
m_pMBLayout = nullptr; // this node does not hold mini-batch data
|
||||
ValidateInferBinaryInputDims();
|
||||
|
||||
if (isFinalValidationPass)
|
||||
{
|
||||
if (!(Input(0)->GetSampleLayout().IsElementwiseCompatibleWith(Input(1)->GetSampleLayout())))
|
||||
|
@ -338,18 +339,25 @@ TensorShape ComputationNodeBase::GetOneSampleTensorSliceFor(size_t rank, const F
|
|||
prototype += "NULL";
|
||||
continue;
|
||||
}
|
||||
|
||||
prototype += msra::strfun::strprintf("[%s%ls]", string(child->m_sampleLayout).c_str(), child->GetMBLayoutAxisString().c_str());
|
||||
prototype += child->ShapeDescription().c_str();
|
||||
}
|
||||
prototype += extraArgs;
|
||||
//prototype += ")";
|
||||
}
|
||||
|
||||
prototype += msra::strfun::strprintf(" -> [%s%ls]", string(GetSampleLayout()).c_str(), GetMBLayoutAxisString().c_str());
|
||||
prototype += msra::strfun::strprintf(" -> %s", ShapeDescription().c_str());
|
||||
|
||||
return prototype;
|
||||
}
|
||||
|
||||
const std::string ComputationNodeBase::ShapeDescription() const
|
||||
{
|
||||
return msra::strfun::strprintf("[%s%s%ls]",
|
||||
string(m_sampleLayout).c_str(),
|
||||
HasMBLayout() ? " x " : "",
|
||||
HasMBLayout() ? GetMBLayout()->GetAxisName() : L"");
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
/*virtual*/ void ComputationNode<ElemType>::DumpNodeInfo(const bool /*printValues*/, const bool printMetadata, File& fstream) const
|
||||
{
|
||||
|
|
|
@ -36,7 +36,8 @@
|
|||
#define CNTK_MODEL_VERSION_5 5 // ND convolution and pooling
|
||||
#define CNTK_MODEL_VERSION_6 6 // Batch norm blending
|
||||
#define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file
|
||||
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_7
|
||||
#define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs
|
||||
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_8
|
||||
|
||||
extern bool g_shareNodeValueMatrices;
|
||||
|
||||
|
@ -553,9 +554,14 @@ public:
|
|||
|
||||
// helper for the factory function for ComputationNodes
|
||||
static vector<ComputationNodeBasePtr> GetInputsFromConfig(const ScriptableObjects::IConfigRecordPtr configp)
|
||||
{
|
||||
return GetInputsFromConfig(configp, L"inputs");
|
||||
}
|
||||
|
||||
static vector<ComputationNodeBasePtr> GetInputsFromConfig(const ScriptableObjects::IConfigRecordPtr configp, const std::wstring& property)
|
||||
{
|
||||
vector<ComputationNodeBasePtr> inputs;
|
||||
const auto* inputsArg = configp->Find(L"inputs");
|
||||
const auto* inputsArg = configp->Find(property);
|
||||
if (inputsArg)
|
||||
{
|
||||
if (inputsArg->Is<ComputationNodeBase>()) // single arg
|
||||
|
@ -817,6 +823,9 @@ public:
|
|||
return std::wstring(L"Node '") + NodeName().c_str() + L"' (" + OperationName().c_str() + L" operation)";
|
||||
};
|
||||
|
||||
// Helper that returns [a x b x c], including dynamic axes.
|
||||
const std::string ShapeDescription() const;
|
||||
|
||||
protected:
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
@ -851,7 +860,8 @@ protected:
|
|||
typedef ComputationNodeBase::ComputationNodeBasePtr ComputationNodeBasePtr;
|
||||
|
||||
// =======================================================================
|
||||
// NumInputs -- little helper interface to allow derived Node classes to specify how many inputs they expect
|
||||
// NumInputs -- little helper interface to allow derived Node classes to
|
||||
// specify how many inputs they expect
|
||||
// =======================================================================
|
||||
|
||||
struct INumInputs { virtual size_t GetExpectedNumInputs() const = 0; };
|
||||
|
@ -864,6 +874,14 @@ struct NumInputs : public INumInputs // e.g. derive from NumInputs<2>
|
|||
}
|
||||
};
|
||||
|
||||
// =======================================================================
|
||||
// Nodes that can take a dynamic axis need to implement this.
|
||||
// =======================================================================
|
||||
struct ITakesDynamicAxis
|
||||
{
|
||||
virtual const std::wstring GetRequestedDynamicAxis() const = 0;
|
||||
};
|
||||
|
||||
// =======================================================================
|
||||
// ComputationNode -- abstract base class for computation nodes, deriving
|
||||
// from CompuationNodeBase, parameterized by float vs. double
|
||||
|
@ -1004,7 +1022,7 @@ public:
|
|||
if (inputs[i])
|
||||
m_inputs[i] = DownCast(inputs[i]); // (DownCast() checks the type; the assignment then downcasts it again)
|
||||
else
|
||||
m_inputs[i] = nullptr; // during network creation, nullpts are possible
|
||||
m_inputs[i] = nullptr; // during network creation, nullptrs are possible
|
||||
}
|
||||
|
||||
protected:
|
||||
|
@ -1406,7 +1424,7 @@ public:
|
|||
virtual void RequestMatricesBeforeForwardProp(MatrixPool& matrixPool) override
|
||||
{
|
||||
if (IsValueSharable())
|
||||
RequestMatrixFromPool(m_value, matrixPool);
|
||||
RequestMatrixFromPool(m_value, matrixPool);
|
||||
else
|
||||
CreateMatrixIfNull(m_value);
|
||||
}
|
||||
|
|
|
@ -108,6 +108,47 @@ public:
|
|||
virtual void DumpNodeInfo(const bool printValues, const bool printMetadata, File& fstream) const override;
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// DynamicAxisNode (/*no input*/)
|
||||
// This is a holder for MBLayout objects shared across inputs.
|
||||
// -----------------------------------------------------------------------
|
||||
template <class ElemType>
|
||||
class DynamicAxisNode : public ComputationNode<ElemType>, public NumInputs<0>
|
||||
{
|
||||
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName() { return L"DynamicAxis"; }
|
||||
public:
|
||||
DynamicAxisNode(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
: Base(deviceId, name)
|
||||
{
|
||||
// BUGBUG: In BS, the node name is not known during node instantiation.
|
||||
// This may require to pass the display name as a separate parameter.
|
||||
|
||||
// This is the whole point of this class: Introduce a new MBLayout that others can use.
|
||||
LinkToMBLayout(make_shared<MBLayout>(1, 0, name));
|
||||
// We need some shape, or validation fails.
|
||||
SetDims(TensorShape(1,1), true);
|
||||
}
|
||||
DynamicAxisNode(const ScriptableObjects::IConfigRecordPtr configp)
|
||||
: DynamicAxisNode(configp->Get(L"deviceId"), L"<placeholder>")
|
||||
{
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ ForwardProp(const FrameRange&) override
|
||||
{
|
||||
RuntimeError("%ls is a special node only to be used as input to the Input() node.", NodeDescription().c_str());
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ BackpropTo(const size_t /*inputIndex*/, const FrameRange&)
|
||||
{
|
||||
LogicError("%ls is a leaf node. BackpropTo() should never be called.", NodeDescription().c_str());
|
||||
}
|
||||
};
|
||||
|
||||
template class DynamicAxisNode<float>;
|
||||
template class DynamicAxisNode<double>;
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// InputValueBase (/*no input*/)
|
||||
// Base class for InputValue and SparseInputValue (typically fed by a DataReader)
|
||||
|
@ -116,12 +157,12 @@ public:
|
|||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
class InputValueBase : public ComputationNode<ElemType>, public NumInputs<0>
|
||||
class InputValueBase : public ComputationNode<ElemType>, public NumInputs<0>, public ITakesDynamicAxis
|
||||
{
|
||||
typedef ComputationNode<ElemType> Base;
|
||||
UsingComputationNodeMembers;
|
||||
|
||||
void Init(const TensorShape& sampleLayout, bool isSparse)
|
||||
void Init(const TensorShape& sampleLayout, bool isSparse, const std::wstring axisName)
|
||||
{
|
||||
m_isSparse = isSparse;
|
||||
MarkValueNonSharable();
|
||||
|
@ -131,33 +172,61 @@ class InputValueBase : public ComputationNode<ElemType>, public NumInputs<0>
|
|||
SetDims(sampleLayout, HasMBLayout()); // also called when reloading a file. Then we have an MBLayout, otherwise not yet
|
||||
UpdateFunctionValuesSize(); // we must allocate the matrix so that the readers get objects with valid row dimensions (some readers expect that)
|
||||
SetLearningRateMultiplier(0);
|
||||
m_dynamicAxisNodeName = axisName;
|
||||
}
|
||||
|
||||
protected:
|
||||
InputValueBase(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& sampleLayout, bool isSparse)
|
||||
InputValueBase(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& sampleLayout, bool isSparse, const std::wstring axisName)
|
||||
: Base(deviceId, name)
|
||||
{
|
||||
Init(sampleLayout, isSparse);
|
||||
Init(sampleLayout, isSparse, axisName);
|
||||
}
|
||||
InputValueBase(DEVICEID_TYPE deviceId, const wstring& name, size_t rows, bool isSparse)
|
||||
: InputValueBase(deviceId, name, TensorShape(rows), isSparse)
|
||||
InputValueBase(DEVICEID_TYPE deviceId, const wstring& name, size_t rows, bool isSparse, const std::wstring axisName)
|
||||
: InputValueBase(deviceId, name, TensorShape(rows), isSparse, axisName)
|
||||
{
|
||||
}
|
||||
InputValueBase(DEVICEID_TYPE deviceId, const wstring& name, bool isSparse)
|
||||
: InputValueBase(deviceId, name, TensorShape(), isSparse)
|
||||
InputValueBase(DEVICEID_TYPE deviceId, const wstring& name, bool isSparse, const std::wstring axisName)
|
||||
: InputValueBase(deviceId, name, TensorShape(), isSparse, axisName)
|
||||
{
|
||||
}
|
||||
InputValueBase(const ScriptableObjects::IConfigRecordPtr configp, bool isSparse)
|
||||
: Base(configp->Get(L"deviceId"), L"<placeholder>")
|
||||
{
|
||||
AttachInputsFromConfig(configp, this->GetExpectedNumInputs());
|
||||
wstring axisName = L"";
|
||||
// TODO This currently reads a ComputationNode object from a property, thereby bypassing "normal" input handling.
|
||||
// The passing of shapes represents a second graph that is "overlaid" (and previously identical) to the data
|
||||
// flow network. This needs to be solved on a more fundamental level.
|
||||
// The proposed future change from fseide is as follows:
|
||||
// (2) On BS level, dynamicAxis is an optional parameter that takes a DynamicAxis object--the alternative,
|
||||
// passing a string, will be removed.
|
||||
// (3) The dynamicAxis argument will become an actual m_inputs[] to the InputValue. I.e.InputValues are no
|
||||
// longer leaves from the ComputationNetwork viewpoint. But they ARE leaves from the user / BS / NDL view, as
|
||||
// the axis is not passed as a regular input.This way, the current special - casing can and will be removed;
|
||||
// instead, the MBLayout propagation will happen automagically as part of regular ValidateNetwork().
|
||||
if (configp->Exists(L"dynamicAxis"))
|
||||
{
|
||||
auto axisConfig = configp->Find(L"dynamicAxis");
|
||||
if (axisConfig->Is<ComputationNodeBase>())
|
||||
{
|
||||
ComputationNodeBasePtr axis = configp->Get(L"dynamicAxis");
|
||||
axisName = axis->GetName();
|
||||
}
|
||||
else
|
||||
{
|
||||
axisName = (const std::wstring&)*axisConfig;
|
||||
}
|
||||
}
|
||||
|
||||
bool isImage = configp->Get(L"isImage");
|
||||
if (!isImage)
|
||||
Init(configp->Get(L"shape"), isSparse);
|
||||
Init(configp->Get(L"shape"), isSparse, axisName);
|
||||
else
|
||||
Init(ImageDimensions::AsTensorShape(configp->Get(L"imageWidth"), configp->Get(L"imageHeight"), configp->Get(L"imageChannels"), ImageLayoutKindFrom(configp->Get(L"imageLayout"))), isSparse);
|
||||
Init(ImageDimensions::AsTensorShape(configp->Get(L"imageWidth"), configp->Get(L"imageHeight"), configp->Get(L"imageChannels"), ImageLayoutKindFrom(configp->Get(L"imageLayout"))), isSparse, axisName);
|
||||
}
|
||||
|
||||
virtual const std::wstring GetRequestedDynamicAxis() const { return m_dynamicAxisNodeName; }
|
||||
|
||||
public:
|
||||
virtual void Save(File& fstream) const override
|
||||
{
|
||||
|
@ -166,6 +235,10 @@ public:
|
|||
size_t colsDummy = 0;
|
||||
fstream << rowsDummy << colsDummy;
|
||||
m_sampleLayout.Save(fstream);
|
||||
|
||||
unsigned int nrAxes = 1;
|
||||
fstream << nrAxes;
|
||||
fstream << m_dynamicAxisNodeName;
|
||||
}
|
||||
|
||||
virtual void Load(File& fstream, size_t modelVersion) override
|
||||
|
@ -180,10 +253,22 @@ public:
|
|||
if (rows != 0 /*old file*/ && rows != sampleLayout.GetNumElements() /*even older file*/)
|
||||
{
|
||||
fprintf(stderr, "WARNING: %ls InputValue has inconsistent serialized sample layout %s vs. number of rows %d. Resetting sample layout to vector.\n",
|
||||
NodeName().c_str(), string(sampleLayout).c_str(), (int) rows);
|
||||
NodeName().c_str(), string(sampleLayout).c_str(), (int)rows);
|
||||
sampleLayout = TensorShape(rows);
|
||||
}
|
||||
Init(sampleLayout, m_isSparse);
|
||||
|
||||
if (modelVersion >= CNTK_MODEL_VERSION_8)
|
||||
{
|
||||
unsigned int nrAxes;
|
||||
fstream >> nrAxes;
|
||||
if (nrAxes == 1)
|
||||
fstream >> m_dynamicAxisNodeName;
|
||||
else if (nrAxes > 1)
|
||||
RuntimeError("Input node: This version only supports a single dynamic axis. Please update your bits.");
|
||||
}
|
||||
else
|
||||
m_dynamicAxisNodeName = L""; // Use default
|
||||
Init(sampleLayout, m_isSparse, m_dynamicAxisNodeName);
|
||||
}
|
||||
|
||||
// InputValue must not resize its inputs because that might destroy it. It should already have the correct size.
|
||||
|
@ -216,6 +301,9 @@ public:
|
|||
|
||||
private:
|
||||
bool m_isSparse = false;
|
||||
std::wstring m_dynamicAxisNodeName;
|
||||
ComputationNodeBase* m_dynamicAxisNode;
|
||||
|
||||
void ConvertToSparseMatrix()
|
||||
{
|
||||
m_value->SwitchToMatrixType(MatrixType::SPARSE, matrixFormatSparseCSC, false);
|
||||
|
@ -237,15 +325,19 @@ class InputValue : public InputValueBase<ElemType>
|
|||
|
||||
public:
|
||||
InputValue(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
: Base(deviceId, name, false)
|
||||
: Base(deviceId, name, false, L"")
|
||||
{
|
||||
}
|
||||
InputValue(DEVICEID_TYPE deviceId, const wstring& name, size_t rows)
|
||||
: Base(deviceId, name, rows, false)
|
||||
InputValue(DEVICEID_TYPE deviceId, const wstring& name, const wstring& dynamicAxisName)
|
||||
: Base(deviceId, name, false, dynamicAxisName)
|
||||
{
|
||||
}
|
||||
InputValue(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& sampleLayout)
|
||||
: Base(deviceId, name, sampleLayout, false)
|
||||
InputValue(DEVICEID_TYPE deviceId, const wstring& name, size_t rows, const wstring& dynamicAxisName)
|
||||
: Base(deviceId, name, rows, false, dynamicAxisName)
|
||||
{
|
||||
}
|
||||
InputValue(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& sampleLayout, const wstring& dynamicAxisName)
|
||||
: Base(deviceId, name, sampleLayout, false, dynamicAxisName)
|
||||
{
|
||||
}
|
||||
InputValue(const ScriptableObjects::IConfigRecordPtr configp)
|
||||
|
@ -275,15 +367,19 @@ class SparseInputValue : public InputValueBase<ElemType>
|
|||
|
||||
public:
|
||||
SparseInputValue(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
: Base(deviceId, name, true)
|
||||
: Base(deviceId, name, true, L"")
|
||||
{
|
||||
}
|
||||
SparseInputValue(DEVICEID_TYPE deviceId, const wstring& name, size_t rows)
|
||||
: Base(deviceId, name, rows, true)
|
||||
SparseInputValue(DEVICEID_TYPE deviceId, const wstring& name, const wstring& dynamicAxisName)
|
||||
: Base(deviceId, name, true, dynamicAxisName)
|
||||
{
|
||||
}
|
||||
SparseInputValue(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& imageLayout)
|
||||
: Base(deviceId, name, imageLayout, true)
|
||||
SparseInputValue(DEVICEID_TYPE deviceId, const wstring& name, size_t rows, const wstring& dynamicAxisName)
|
||||
: Base(deviceId, name, rows, true, dynamicAxisName)
|
||||
{
|
||||
}
|
||||
SparseInputValue(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& imageLayout, const wstring& dynamicAxisName)
|
||||
: Base(deviceId, name, imageLayout, true, dynamicAxisName)
|
||||
{
|
||||
}
|
||||
SparseInputValue(const ScriptableObjects::IConfigRecordPtr configp)
|
||||
|
|
|
@ -117,7 +117,7 @@ template <class ElemType>
|
|||
if (!m_pMBLayout)
|
||||
{
|
||||
m_pMBLayout = make_shared<MBLayout>(); // this generates a new layout
|
||||
m_pMBLayout->SetUniqueAxisName(NodeName());
|
||||
m_pMBLayout->SetUniqueAxisName(L"WhereNodeAxis");
|
||||
}
|
||||
// we map scalars to scalars
|
||||
if (isFinalValidationPass && Input(0)->GetSampleLayout().GetNumElements() != 1)
|
||||
|
@ -157,6 +157,7 @@ template <class ElemType>
|
|||
result(0, jIndex) = (ElemType)jSource;
|
||||
}
|
||||
}
|
||||
// Note: maybe this is no longer needed, now that we do the same inside UpdateFunctionValueSize() for all nodes.
|
||||
result.CollapseDataLocationAfterWriting(); // BUGBUG: Move back, since BOTH state is broken at present.
|
||||
}
|
||||
|
||||
|
|
|
@ -171,7 +171,7 @@ template class ReshapeNode<float>;
|
|||
template class ReshapeNode<double>;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ReconcileMBLayout (dataInput, layoutInput)
|
||||
// ReconcileDynamicAxis (dataInput, layoutInput)
|
||||
// This node copies data from 'dataInput' while it propagates the minibatch-layout information from 'layoutInput'.
|
||||
// It does perform a runtime check to enforce that the layout of 'dataInput' is compatible (identical content) to that of 'layoutInput'.
|
||||
// This node is meant to be used from BrainScript macros that bracket expand/reduce pairs of nodes. It is not meant to really be used directly.
|
||||
|
@ -179,14 +179,14 @@ template class ReshapeNode<double>;
|
|||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
class ReconcileMBLayoutNode : public ComputationNode<ElemType>, public NumInputs<2>
|
||||
class ReconcileDynamicAxisNode : public ComputationNode<ElemType>, public NumInputs<2>
|
||||
{
|
||||
typedef ComputationNode<ElemType> Base; UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName() { return L"ReconcileMBLayout"; }
|
||||
static const std::wstring TypeName() { return L"ReconcileDynamicAxis"; }
|
||||
|
||||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(ReconcileMBLayoutNode);
|
||||
ReconcileMBLayoutNode(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
DeclareConstructorFromConfigWithNumInputs(ReconcileDynamicAxisNode);
|
||||
ReconcileDynamicAxisNode(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
: Base(deviceId, name)
|
||||
{
|
||||
}
|
||||
|
@ -228,8 +228,8 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
template class ReconcileMBLayoutNode<float>;
|
||||
template class ReconcileMBLayoutNode<double>;
|
||||
template class ReconcileDynamicAxisNode<float>;
|
||||
template class ReconcileDynamicAxisNode<double>;
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// SliceNode (input)
|
||||
|
|
|
@ -141,6 +141,7 @@
|
|||
<ClCompile Include="..\CNTK\BrainScript\BrainScriptEvaluator.cpp" />
|
||||
<ClCompile Include="..\CNTK\BrainScript\BrainScriptParser.cpp" />
|
||||
<ClCompile Include="..\Common\Config.cpp" />
|
||||
<ClCompile Include="..\Common\DataReader.cpp" />
|
||||
<ClCompile Include="..\Common\Eval.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">true</ExcludedFromBuild>
|
||||
|
|
|
@ -32,6 +32,9 @@
|
|||
<ClCompile Include="..\CNTK\BrainScript\BrainScriptParser.cpp">
|
||||
<Filter>BrainScript</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Common\DataReader.cpp">
|
||||
<Filter>Common</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="EvalReader.h" />
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// Evaluation Reader class
|
||||
// interface to pass to evaluation DLL
|
||||
template <class ElemType>
|
||||
class EvalReader : public IDataReader
|
||||
class EvalReader : public DataReaderBase
|
||||
{
|
||||
std::map<std::wstring, std::vector<ElemType>*>* m_inputs; // our input data
|
||||
std::map<std::wstring, size_t>* m_dimensions; // the number of rows for the input data
|
||||
|
@ -109,11 +109,11 @@ public:
|
|||
m_mbSize = min(mbSize, m_recordCount);
|
||||
}
|
||||
|
||||
// GetMinibatch - Get the next minibatch (features and labels)
|
||||
// TryGetMinibatch - Get the next minibatch (features and labels)
|
||||
// matrices - [in] a map with named matrix types (i.e. 'features', 'labels') mapped to the corresponding matrix,
|
||||
// [out] each matrix resized if necessary containing data.
|
||||
// returns - true if there are more minibatches, false if no more minibatchs remain
|
||||
virtual bool GetMinibatch(StreamMinibatchInputs& matrices)
|
||||
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices)
|
||||
{
|
||||
// how many records are we reading this time
|
||||
size_t recordCount = min(m_mbSize, m_recordCount - m_currentRecord);
|
||||
|
|
|
@ -664,7 +664,7 @@ CPUMatrix<ElemType>& CPUMatrix<ElemType>::DoGatherColumnsOf(ElemType beta, const
|
|||
continue;
|
||||
size_t jIn = (size_t)jInF;
|
||||
if (jIn >= a.GetNumCols())
|
||||
InvalidArgument("DoGatherColumnsOf: Map out of bounds.");
|
||||
InvalidArgument("DoGatherColumnsOf: Map out of bounds. %ld >= %ld", (long int)jIn, (long int)a.GetNumCols());
|
||||
ScaleAndAddColumn(beta, &us(0,jOut), &a(0,jIn), us.GetNumRows(), alpha);
|
||||
}
|
||||
|
||||
|
@ -6091,7 +6091,7 @@ void CPUMatrix<ElemType>::TensorOp(ElemType beta, const CPUMatrix<ElemType>& a,
|
|||
if (reductionOp != ElementWiseOperator::opSum) // TODO: enable the reduction ops
|
||||
InvalidArgument("TensorOp: Unary reduction operations other than opSum not yet implemented.");
|
||||
|
||||
// TODO: Change the lambda to take a pointer and a number of elements, so that we can pass it 1 or 4 elements, in order for it to SSE-vectorize.
|
||||
// TODO: Change the lambda to take a pointer and a number of elements, so that we can pass it 1 or 4 elements, in order for it to SSE-vectorize.
|
||||
#define CaseUnaryTensorOp(oper) \
|
||||
case ElementWiseOperator::op##oper: \
|
||||
return TensorOpWithFn(beta, pointers, alpha, [](const array<ElemType*, 2>& pp) \
|
||||
|
|
|
@ -893,7 +893,7 @@ __global__ void _doGatherColumnsOf(ElemType* us, size_t usStride, const ElemType
|
|||
|
||||
const ElemType& ra = a[ i + jIn * aStride ];
|
||||
ElemType& rus = us[id/*i + jOut * usStride*/];
|
||||
|
||||
|
||||
ElemType res = ra * alpha;
|
||||
if (beta != 0)
|
||||
res += rus * beta;
|
||||
|
|
|
@ -245,7 +245,7 @@ bool BinaryReader<ElemType>::CheckEndDataset(size_t actualmbsize)
|
|||
// [out] each matrix resized if necessary containing data.
|
||||
// returns - true if there are more minibatches, false if no more minibatchs remain
|
||||
template <class ElemType>
|
||||
bool BinaryReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
||||
bool BinaryReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
|
||||
{
|
||||
// get out if they didn't call StartMinibatchLoop() first
|
||||
if (m_mbSize == 0)
|
||||
|
|
|
@ -541,7 +541,7 @@ public:
|
|||
};
|
||||
|
||||
template <class ElemType>
|
||||
class BinaryReader : public IDataReader
|
||||
class BinaryReader : public DataReaderBase
|
||||
{
|
||||
size_t m_mbSize; // size of minibatch requested
|
||||
size_t m_mbStartSample; // starting sample # of the next minibatch
|
||||
|
@ -587,7 +587,7 @@ public:
|
|||
}
|
||||
virtual ~BinaryReader();
|
||||
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
|
||||
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
|
||||
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
|
||||
|
||||
size_t GetNumParallelSequences()
|
||||
{
|
||||
|
|
|
@ -140,7 +140,7 @@ void Indexer::Build()
|
|||
size_t id = 0;
|
||||
int64_t offset = GetFileOffset();
|
||||
// read the very first sequence id
|
||||
if (!GetNextSequenceId(id))
|
||||
if (!TryGetSequenceId(id))
|
||||
{
|
||||
RuntimeError("Expected a sequence id at the offset %" PRIi64 ", none was found.", offset);
|
||||
}
|
||||
|
@ -156,7 +156,7 @@ void Indexer::Build()
|
|||
offset = GetFileOffset(); // a new line starts at this offset;
|
||||
sd.m_numberOfSamples++;
|
||||
|
||||
if (!m_done && GetNextSequenceId(id) && id != sd.m_id)
|
||||
if (!m_done && TryGetSequenceId(id) && id != sd.m_id)
|
||||
{
|
||||
// found a new sequence, which starts at the [offset] bytes into the file
|
||||
sd.m_byteSize = offset - sd.m_fileOffsetBytes;
|
||||
|
@ -192,7 +192,7 @@ void Indexer::SkipLine()
|
|||
}
|
||||
}
|
||||
|
||||
bool Indexer::GetNextSequenceId(size_t& id)
|
||||
bool Indexer::TryGetSequenceId(size_t& id)
|
||||
{
|
||||
bool found = false;
|
||||
id = 0;
|
||||
|
|
|
@ -71,7 +71,7 @@ private:
|
|||
// EOF is reached without hitting the pipe character.
|
||||
// Returns false if no numerical characters are found preceding the pipe.
|
||||
// Otherwise, writes sequence id value to the provided reference, returns true.
|
||||
bool GetNextSequenceId(size_t& id);
|
||||
bool TryGetSequenceId(size_t& id);
|
||||
|
||||
// Build a chunk/sequence index, treating each line as an individual sequence.
|
||||
// Does not do any sequence parsing, instead uses line number as
|
||||
|
|
|
@ -332,7 +332,7 @@ void TextParser<ElemType>::IncrementNumberOfErrorsOrDie()
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
bool TextParser<ElemType>::RefillBuffer()
|
||||
bool TextParser<ElemType>::TryRefillBuffer()
|
||||
{
|
||||
size_t bytesRead = fread(m_buffer.get(), 1, BUFFER_SIZE, m_file);
|
||||
|
||||
|
@ -364,7 +364,7 @@ void TextParser<ElemType>::SetFileOffset(int64_t offset)
|
|||
m_fileOffsetStart = offset;
|
||||
m_fileOffsetEnd = offset;
|
||||
|
||||
RefillBuffer();
|
||||
TryRefillBuffer();
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -384,7 +384,7 @@ typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence
|
|||
if (verifyId)
|
||||
{
|
||||
size_t id;
|
||||
if (!ReadUint64(id, bytesToRead) || id != sequenceDsc.m_id)
|
||||
if (!TryReadUint64(id, bytesToRead) || id != sequenceDsc.m_id)
|
||||
{
|
||||
RuntimeError("Did not find the expected sequence id ( %" PRIu64 ") "
|
||||
" at the file offset = %" PRId64 "\n", sequenceDsc.m_id, GetFileOffset());
|
||||
|
@ -410,7 +410,7 @@ typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence
|
|||
size_t numRowsRead = 0, expectedRowCount = sequenceDsc.m_numberOfSamples;
|
||||
for (size_t i = 0; i < expectedRowCount; i++)
|
||||
{
|
||||
if ((ReadRow(sequence, bytesToRead)))
|
||||
if ((TryReadRow(sequence, bytesToRead)))
|
||||
{
|
||||
++numRowsRead;
|
||||
}
|
||||
|
@ -472,7 +472,7 @@ typename TextParser<ElemType>::SequenceBuffer TextParser<ElemType>::LoadSequence
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
bool TextParser<ElemType>::ReadRow(SequenceBuffer& sequence, size_t& bytesToRead)
|
||||
bool TextParser<ElemType>::TryReadRow(SequenceBuffer& sequence, size_t& bytesToRead)
|
||||
{
|
||||
bool found = false;
|
||||
while (bytesToRead && CanRead())
|
||||
|
@ -496,7 +496,7 @@ bool TextParser<ElemType>::ReadRow(SequenceBuffer& sequence, size_t& bytesToRead
|
|||
}
|
||||
|
||||
size_t id;
|
||||
if (!GetInputId(id, bytesToRead))
|
||||
if (!TryGetInputId(id, bytesToRead))
|
||||
{
|
||||
IncrementNumberOfErrorsOrDie();
|
||||
SkipToNextInput(bytesToRead);
|
||||
|
@ -511,7 +511,7 @@ bool TextParser<ElemType>::ReadRow(SequenceBuffer& sequence, size_t& bytesToRead
|
|||
vector<ElemType>& values = data->m_buffer;
|
||||
size_t size = values.size();
|
||||
assert(size % stream.m_sampleDimension == 0);
|
||||
if (!ReadDenseSample(values, stream.m_sampleDimension, bytesToRead))
|
||||
if (!TryReadDenseSample(values, stream.m_sampleDimension, bytesToRead))
|
||||
{
|
||||
// expected a dense sample, but was not able to fully read it, ignore it.
|
||||
if (values.size() != size)
|
||||
|
@ -533,7 +533,7 @@ bool TextParser<ElemType>::ReadRow(SequenceBuffer& sequence, size_t& bytesToRead
|
|||
vector<IndexType>& indices = data->m_indices;
|
||||
assert(values.size() == indices.size());
|
||||
size_t size = values.size();
|
||||
if (!ReadSparseSample(values, indices, bytesToRead))
|
||||
if (!TryReadSparseSample(values, indices, bytesToRead))
|
||||
{
|
||||
// expected a sparse sample, but something went south, ignore it.
|
||||
if (values.size() != size)
|
||||
|
@ -572,7 +572,7 @@ bool TextParser<ElemType>::ReadRow(SequenceBuffer& sequence, size_t& bytesToRead
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
bool TextParser<ElemType>::GetInputId(size_t& id, size_t& bytesToRead)
|
||||
bool TextParser<ElemType>::TryGetInputId(size_t& id, size_t& bytesToRead)
|
||||
{
|
||||
char* scratchIndex = m_scratch.get();
|
||||
|
||||
|
@ -664,7 +664,7 @@ bool TextParser<ElemType>::GetInputId(size_t& id, size_t& bytesToRead)
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
bool TextParser<ElemType>::ReadDenseSample(vector<ElemType>& values, size_t sampleSize, size_t& bytesToRead)
|
||||
bool TextParser<ElemType>::TryReadDenseSample(vector<ElemType>& values, size_t sampleSize, size_t& bytesToRead)
|
||||
{
|
||||
size_t counter = 0;
|
||||
ElemType value;
|
||||
|
@ -708,7 +708,7 @@ bool TextParser<ElemType>::ReadDenseSample(vector<ElemType>& values, size_t samp
|
|||
continue;
|
||||
}
|
||||
|
||||
if (!ReadRealNumber(value, bytesToRead))
|
||||
if (!TryReadRealNumber(value, bytesToRead))
|
||||
{
|
||||
// bail out.
|
||||
return false;
|
||||
|
@ -730,7 +730,7 @@ bool TextParser<ElemType>::ReadDenseSample(vector<ElemType>& values, size_t samp
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
bool TextParser<ElemType>::ReadSparseSample(std::vector<ElemType>& values, std::vector<IndexType>& indices, size_t& bytesToRead)
|
||||
bool TextParser<ElemType>::TryReadSparseSample(std::vector<ElemType>& values, std::vector<IndexType>& indices, size_t& bytesToRead)
|
||||
{
|
||||
size_t index;
|
||||
ElemType value;
|
||||
|
@ -755,7 +755,7 @@ bool TextParser<ElemType>::ReadSparseSample(std::vector<ElemType>& values, std::
|
|||
}
|
||||
|
||||
// read next sparse index
|
||||
if (!ReadUint64(index, bytesToRead))
|
||||
if (!TryReadUint64(index, bytesToRead))
|
||||
{
|
||||
// bail out.
|
||||
return false;
|
||||
|
@ -771,6 +771,17 @@ bool TextParser<ElemType>::ReadSparseSample(std::vector<ElemType>& values, std::
|
|||
// bail out.
|
||||
return false;
|
||||
}
|
||||
if (index > numeric_limits<IndexType>::max())
|
||||
{
|
||||
if (m_traceLevel >= Warning)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"WARNING: sparse index value(%" PRIu64 ") exceeds the maximum allowed "
|
||||
" value (%" PRIu64 ")\n", index, (size_t)numeric_limits<IndexType>::max());
|
||||
}
|
||||
// bail out.
|
||||
return false;
|
||||
}
|
||||
|
||||
// an index must be followed by a delimiter
|
||||
c = *m_pos;
|
||||
|
@ -792,7 +803,7 @@ bool TextParser<ElemType>::ReadSparseSample(std::vector<ElemType>& values, std::
|
|||
}
|
||||
|
||||
// read the corresponding value
|
||||
if (!ReadRealNumber(value, bytesToRead))
|
||||
if (!TryReadRealNumber(value, bytesToRead))
|
||||
{
|
||||
// bail out.
|
||||
return false;
|
||||
|
@ -847,7 +858,7 @@ void TextParser<ElemType>::SkipToNextInput(size_t& bytesToRead)
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
bool TextParser<ElemType>::ReadUint64(size_t& value, size_t& bytesToRead)
|
||||
bool TextParser<ElemType>::TryReadUint64(size_t& value, size_t& bytesToRead)
|
||||
{
|
||||
value = 0;
|
||||
bool found = false;
|
||||
|
@ -900,7 +911,7 @@ bool TextParser<ElemType>::ReadUint64(size_t& value, size_t& bytesToRead)
|
|||
// cannot be parsed as part of a floating point number.
|
||||
// Returns true if parsing was successful.
|
||||
template <class ElemType>
|
||||
bool TextParser<ElemType>::ReadRealNumber(ElemType& value, size_t& bytesToRead)
|
||||
bool TextParser<ElemType>::TryReadRealNumber(ElemType& value, size_t& bytesToRead)
|
||||
{
|
||||
State state = State::Init;
|
||||
double coefficient = .0, number = .0, divider = .0;
|
||||
|
|
|
@ -124,28 +124,28 @@ private:
|
|||
void SkipToNextValue(size_t& bytesToRead);
|
||||
void SkipToNextInput(size_t& bytesToRead);
|
||||
|
||||
bool RefillBuffer();
|
||||
bool TryRefillBuffer();
|
||||
|
||||
int64_t GetFileOffset() const { return m_fileOffsetStart + (m_pos - m_bufferStart); }
|
||||
|
||||
// Reads an alias/name and converts it to an internal stream id (= stream index).
|
||||
bool GetInputId(size_t& id, size_t& bytesToRead);
|
||||
bool TryGetInputId(size_t& id, size_t& bytesToRead);
|
||||
|
||||
bool ReadRealNumber(ElemType& value, size_t& bytesToRead);
|
||||
bool TryReadRealNumber(ElemType& value, size_t& bytesToRead);
|
||||
|
||||
bool ReadUint64(size_t& value, size_t& bytesToRead);
|
||||
bool TryReadUint64(size_t& value, size_t& bytesToRead);
|
||||
|
||||
// Reads dense sample values into the provided vector.
|
||||
bool ReadDenseSample(std::vector<ElemType>& values, size_t sampleSize, size_t& bytesToRead);
|
||||
bool TryReadDenseSample(std::vector<ElemType>& values, size_t sampleSize, size_t& bytesToRead);
|
||||
|
||||
// Reads sparse sample values and corresponging indices into the provided vectors.
|
||||
bool ReadSparseSample(std::vector<ElemType>& values, std::vector<IndexType>& indices, size_t& bytesToRead);
|
||||
bool TryReadSparseSample(std::vector<ElemType>& values, std::vector<IndexType>& indices, size_t& bytesToRead);
|
||||
|
||||
// Reads one whole row (terminated by a row delimiter) of samples
|
||||
bool ReadRow(SequenceBuffer& sequence, size_t& bytesToRead);
|
||||
bool TryReadRow(SequenceBuffer& sequence, size_t& bytesToRead);
|
||||
|
||||
// Returns true if there's still data available.
|
||||
bool inline CanRead() { return m_pos != m_bufferEnd || RefillBuffer(); }
|
||||
bool inline CanRead() { return m_pos != m_bufferEnd || TryRefillBuffer(); }
|
||||
|
||||
// Given a descriptor, retrieves the data for the corresponging sequence from the file.
|
||||
SequenceBuffer LoadSequence(bool verifyId, const SequenceDescriptor& descriptor);
|
||||
|
|
|
@ -320,7 +320,7 @@ void DSSMReader<ElemType>::StoreLabel(ElemType& labelStore, const LabelType& lab
|
|||
// [out] each matrix resized if necessary containing data.
|
||||
// returns - true if there are more minibatches, false if no more minibatchs remain
|
||||
template <class ElemType>
|
||||
bool DSSMReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
||||
bool DSSMReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
|
||||
{
|
||||
if (m_readNextSample >= m_totalSamples)
|
||||
{
|
||||
|
|
|
@ -64,7 +64,7 @@ public:
|
|||
};
|
||||
|
||||
template <class ElemType>
|
||||
class DSSMReader : public IDataReader
|
||||
class DSSMReader : public DataReaderBase
|
||||
{
|
||||
// public:
|
||||
// typedef std::string LabelType;
|
||||
|
@ -159,7 +159,7 @@ public:
|
|||
}
|
||||
virtual ~DSSMReader();
|
||||
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
|
||||
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
|
||||
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
|
||||
|
||||
size_t GetNumParallelSequences()
|
||||
{
|
||||
|
|
|
@ -931,7 +931,7 @@ bool HTKMLFReader<ElemType>::GetHmmData(msra::asr::simplesenonehmm* hmm)
|
|||
// returns - true if there are more minibatches, false if no more minibatchs remain
|
||||
// TODO: Why do we have two read functions? Is one not a superset of the other?
|
||||
template <class ElemType>
|
||||
bool HTKMLFReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
||||
bool HTKMLFReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
|
||||
{
|
||||
if (m_trainOrTest)
|
||||
{
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
template <class ElemType>
|
||||
class HTKMLFReader : public IDataReader
|
||||
class HTKMLFReader : public DataReaderBase
|
||||
{
|
||||
private:
|
||||
const static size_t m_htkRandomizeAuto = 0;
|
||||
|
@ -184,7 +184,7 @@ public:
|
|||
|
||||
virtual void StartDistributedMinibatchLoop(size_t mbSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples = requestDataSize) override;
|
||||
|
||||
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
|
||||
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
|
||||
virtual const std::map<LabelIdType, LabelType>& GetLabelMapping(const std::wstring& sectionName);
|
||||
virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<LabelIdType, LabelType>& labelMapping);
|
||||
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart = 0);
|
||||
|
|
|
@ -64,11 +64,15 @@
|
|||
<PreprocessorDefinitions>WIN32;_DEBUG;_WINDOWS;_USRDLL;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
<AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">..\..\common\include;..\..\Math</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">..\..\common\include;..\..\Math</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
|
||||
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Debug_CpuOnly|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="$(ReleaseBuild)">
|
||||
|
@ -82,6 +86,8 @@
|
|||
<SDLCheck>true</SDLCheck>
|
||||
<AdditionalOptions>/d2Zi+ %(AdditionalOptions)</AdditionalOptions>
|
||||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
<AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">..\..\common\include;..\..\Math</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release_CpuOnly|x64'">..\..\common\include;..\..\Math</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
|
@ -90,6 +96,8 @@
|
|||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalDependencies>Math.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<Profile>true</Profile>
|
||||
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
|
||||
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Release_CpuOnly|x64'">$(SolutionDir)$(Platform)\$(Configuration)\</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
|
@ -115,6 +123,7 @@
|
|||
<ClInclude Include="utterancesourcemulti.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\Common\DataReader.cpp" />
|
||||
<ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp" />
|
||||
<ClCompile Include="..\..\Common\TimerUtility.cpp">
|
||||
<PrecompiledHeader>NotUsing</PrecompiledHeader>
|
||||
|
|
|
@ -15,6 +15,9 @@
|
|||
<ClCompile Include="..\..\Common\ExceptionWithCallStack.cpp" />
|
||||
<ClCompile Include="Exports.cpp" />
|
||||
<ClCompile Include="DataWriterLocal.cpp" />
|
||||
<ClCompile Include="..\..\Common\DataReader.cpp">
|
||||
<Filter>Common\Include</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="biggrowablevectors.h" />
|
||||
|
|
|
@ -846,7 +846,7 @@ void HTKMLFReader<ElemType>::StartMinibatchLoopToWrite(size_t mbSize, size_t /*e
|
|||
// [out] each matrix resized if necessary containing data.
|
||||
// returns - true if there are more minibatches, false if no more minibatchs remain
|
||||
template <class ElemType>
|
||||
bool HTKMLFReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
||||
bool HTKMLFReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
|
||||
{
|
||||
if (m_trainOrTest)
|
||||
{
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
template <class ElemType>
|
||||
class HTKMLFReader : public IDataReader
|
||||
class HTKMLFReader : public DataReaderBase
|
||||
{
|
||||
private:
|
||||
msra::dbn::minibatchiterator* m_mbiter;
|
||||
|
@ -186,7 +186,7 @@ public:
|
|||
}
|
||||
virtual ~HTKMLFReader();
|
||||
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
|
||||
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
|
||||
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
|
||||
virtual const std::map<LabelIdType, LabelType>& GetLabelMapping(const std::wstring& sectionName);
|
||||
virtual void SetLabelMapping(const std::wstring& sectionName, const std::map<LabelIdType, LabelType>& labelMapping);
|
||||
virtual bool GetData(const std::wstring& sectionName, size_t numRecords, void* data, size_t& dataBufferSize, size_t recordStart = 0);
|
||||
|
|
|
@ -1138,7 +1138,7 @@ void SequenceReader<ElemType>::GetClassInfo()
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
bool SequenceReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
||||
bool SequenceReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
|
||||
{
|
||||
FailBecauseDeprecated(__FUNCTION__); // DEPRECATED CLASS, SHOULD NOT BE USED ANYMORE
|
||||
|
||||
|
@ -1889,7 +1889,7 @@ bool BatchSequenceReader<ElemType>::GetMinibatchData(size_t& /*out*/ firstPosInS
|
|||
// - up to N sequences of the same length are returned in each MB
|
||||
// - minibatches consist of sequences of the same length only (no gaps)
|
||||
template <class ElemType>
|
||||
bool BatchSequenceReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
||||
bool BatchSequenceReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
|
||||
{
|
||||
// get out if they didn't call StartMinibatchLoop() first
|
||||
// TODO: Why not fail here?
|
||||
|
@ -2023,7 +2023,7 @@ bool BatchSequenceReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices
|
|||
timePos: the time position. for example, 100 actual minibatch with 10 streams,
|
||||
timePosition = [0,..,9] for each actual tiem
|
||||
*/
|
||||
// This function was only called from BatchSequenceReader::GetMinibatch(), but no longer.
|
||||
// This function was only called from BatchSequenceReader::TryGetMinibatch(), but no longer.
|
||||
template <class ElemType>
|
||||
void BatchSequenceReader<ElemType>::SetSentenceBegin(int wrd, int uttPos, int timePos)
|
||||
{
|
||||
|
|
|
@ -109,7 +109,7 @@ public:
|
|||
|
||||
// Note: This class is deprecated for standalone use, only used as a base for BatchSequenceReader which overrides most of the functions.
|
||||
template <class ElemType>
|
||||
class SequenceReader : public IDataReader
|
||||
class SequenceReader : public DataReaderBase
|
||||
{
|
||||
protected:
|
||||
bool m_idx2clsRead;
|
||||
|
@ -276,7 +276,7 @@ public:
|
|||
}
|
||||
virtual ~SequenceReader();
|
||||
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
|
||||
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
|
||||
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
|
||||
|
||||
// void SetSentenceSegBatch(std::vector<size_t> &/*sentenceEnd*/) {};
|
||||
// TODO: ^^ should this be void CopyMBLayoutTo(MBLayoutPtr pMBLayout);
|
||||
|
@ -407,7 +407,7 @@ private:
|
|||
|
||||
public:
|
||||
void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize) override;
|
||||
bool GetMinibatch(StreamMinibatchInputs& matrices) override;
|
||||
bool TryGetMinibatch(StreamMinibatchInputs& matrices) override;
|
||||
bool DataEnd() override;
|
||||
|
||||
void CopyMBLayoutTo(MBLayoutPtr pMBLayout) { assert(mToProcess.size() == m_pMBLayout->GetNumParallelSequences()); pMBLayout->CopyFrom(m_pMBLayout); }
|
||||
|
|
|
@ -817,7 +817,7 @@ void BatchLUSequenceReader<ElemType>::SetNumParallelSequences(const size_t mz)
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
bool BatchLUSequenceReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
||||
bool BatchLUSequenceReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
|
||||
{
|
||||
// get out if they didn't call StartMinibatchLoop() first
|
||||
// TODO: Why is this allowed? Why not terminate?
|
||||
|
@ -881,12 +881,12 @@ bool BatchLUSequenceReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matric
|
|||
{
|
||||
assert(idx == (LabelIdType) NULLLABEL); // TODO: what other conditions?
|
||||
// if (!m_pMBLayout->IsGap(s, t)) // verify that these are marked as NoInput
|
||||
// LogicError("BatchLUSequenceReader::GetMinibatch observation is larger than its dimension but no_labels sign is not used to indicate that this observation has no labels. Possible reason is a bug in EnsureDataAvailable or a bug here.");
|
||||
// LogicError("BatchLUSequenceReader::TryGetMinibatch observation is larger than its dimension but no_labels sign is not used to indicate that this observation has no labels. Possible reason is a bug in EnsureDataAvailable or a bug here.");
|
||||
continue;
|
||||
}
|
||||
|
||||
// if (m_pMBLayout->IsGap(s, t)) // verify that these are marked as NoInput
|
||||
// LogicError("BatchLUSequenceReader::GetMinibatch: Inconsistent NoInput flag");
|
||||
// LogicError("BatchLUSequenceReader::TryGetMinibatch: Inconsistent NoInput flag");
|
||||
|
||||
locObs.SetValue(idx + jj * featInfo.dim, j, (ElemType) 1);
|
||||
}
|
||||
|
@ -1171,7 +1171,7 @@ template class BatchLUSequenceReader<double>;
|
|||
template class BatchLUSequenceReader<float>;
|
||||
|
||||
template <class ElemType>
|
||||
bool MultiIOBatchLUSequenceReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
||||
bool MultiIOBatchLUSequenceReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
|
||||
{
|
||||
// on first iteration, need to check if all requested data matrices are available
|
||||
std::map<std::wstring, size_t>::iterator iter;
|
||||
|
|
|
@ -47,7 +47,7 @@ enum ReaderMode
|
|||
};
|
||||
|
||||
template <class ElemType>
|
||||
class LUSequenceReader : public IDataReader
|
||||
class LUSequenceReader : public DataReaderBase
|
||||
{
|
||||
protected:
|
||||
bool m_idx2clsRead;
|
||||
|
@ -319,7 +319,7 @@ public:
|
|||
size_t GetLabelOutput(StreamMinibatchInputs& matrices, LabelInfo& labelInfo, size_t actualmbsize);
|
||||
|
||||
void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
|
||||
bool GetMinibatch(StreamMinibatchInputs& matrices);
|
||||
bool TryGetMinibatch(StreamMinibatchInputs& matrices);
|
||||
|
||||
bool EnsureDataAvailable(size_t mbStartSample);
|
||||
size_t GetNumParallelSequences();
|
||||
|
@ -411,7 +411,7 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
bool GetMinibatch(StreamMinibatchInputs& matrices);
|
||||
bool TryGetMinibatch(StreamMinibatchInputs& matrices);
|
||||
|
||||
void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples);
|
||||
|
||||
|
|
|
@ -780,7 +780,7 @@ void LibSVMBinaryReader<ElemType>::DoDSSMMatrix(Matrix<ElemType>& mat, size_t ac
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
bool LibSVMBinaryReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
||||
bool LibSVMBinaryReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
|
||||
{
|
||||
//timer = clock();
|
||||
#if DEBUG
|
||||
|
|
|
@ -226,7 +226,7 @@ private:
|
|||
};
|
||||
|
||||
template <class ElemType>
|
||||
class LibSVMBinaryReader : public IDataReader
|
||||
class LibSVMBinaryReader : public DataReaderBase
|
||||
{
|
||||
public:
|
||||
virtual void Init(const ConfigParameters& config) override
|
||||
|
@ -254,7 +254,7 @@ public:
|
|||
|
||||
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
|
||||
virtual void StartDistributedMinibatchLoop(size_t mbSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples) override;
|
||||
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
|
||||
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
|
||||
|
||||
virtual bool SupportsDistributedMBRead() const override
|
||||
{
|
||||
|
|
|
@ -158,9 +158,10 @@ Minibatch BpttPacker::ReadMinibatch()
|
|||
for (size_t streamIndex = 0; streamIndex < m_outputStreamDescriptions.size(); ++streamIndex)
|
||||
{
|
||||
m_currentLayouts[streamIndex]->Init(m_numParallelSequences, m_truncationSize);
|
||||
size_t sequenceId = 0;
|
||||
for (size_t slotIndex = 0; slotIndex < m_numParallelSequences; ++slotIndex)
|
||||
{
|
||||
PackSlot(streamIndex, slotIndex);
|
||||
PackSlot(streamIndex, slotIndex, sequenceId);
|
||||
}
|
||||
|
||||
StreamMinibatchPtr m = make_shared<StreamMinibatch>();
|
||||
|
@ -173,7 +174,7 @@ Minibatch BpttPacker::ReadMinibatch()
|
|||
}
|
||||
|
||||
// Packs a slot of sequences into the minibatch.
|
||||
void BpttPacker::PackSlot(size_t streamIndex, size_t slotIndex)
|
||||
void BpttPacker::PackSlot(size_t streamIndex, size_t slotIndex, size_t& sequenceId)
|
||||
{
|
||||
auto& slot = m_sequenceBufferPerStream[streamIndex]->m_slots[slotIndex];
|
||||
|
||||
|
@ -204,7 +205,7 @@ void BpttPacker::PackSlot(size_t streamIndex, size_t slotIndex)
|
|||
|
||||
// Add current sequence to the minibatch layout.
|
||||
m_currentLayouts[streamIndex]->AddSequence(
|
||||
NEW_SEQUENCE_ID,
|
||||
sequenceId++,
|
||||
slotIndex,
|
||||
-(int)slot.m_sampleCursor,
|
||||
slot.FrontSequence()->m_numberOfSamples - slot.m_sampleCursor);
|
||||
|
@ -220,7 +221,7 @@ void BpttPacker::PackSlot(size_t streamIndex, size_t slotIndex)
|
|||
|
||||
//Adding next sequence to the minibatch.
|
||||
m_currentLayouts[streamIndex]->AddSequence(
|
||||
NEW_SEQUENCE_ID,
|
||||
sequenceId++,
|
||||
slotIndex,
|
||||
currentTimestep,
|
||||
currentTimestep + slot.FrontSequence()->m_numberOfSamples);
|
||||
|
|
|
@ -36,7 +36,11 @@ private:
|
|||
void ReadSequencesToSlot(size_t slotIndex);
|
||||
|
||||
// Packs a slot into the data buffer.
|
||||
void PackSlot(size_t streamIndex, size_t slotIndex);
|
||||
// SequenceId specifies the starting value to be used as sequence identifier.
|
||||
// For each new input, sequence id is reset to 0, and incremented each time
|
||||
// a sequence is added to the layout. This allows layouts corresponding to different
|
||||
// inputs to have consistent sequence ids.
|
||||
void PackSlot(size_t streamIndex, size_t slotIndex, size_t& sequenceId);
|
||||
|
||||
virtual MBLayoutPtr CreateMBLayout(const StreamBatch& batch)
|
||||
{
|
||||
|
|
|
@ -34,9 +34,9 @@ protected:
|
|||
};
|
||||
|
||||
PackerBase(MemoryProviderPtr memoryProvider,
|
||||
TransformerPtr transformer,
|
||||
size_t minibatchSize,
|
||||
const std::vector<StreamDescriptionPtr>& streams);
|
||||
TransformerPtr transformer,
|
||||
size_t minibatchSize,
|
||||
const std::vector<StreamDescriptionPtr>& streams);
|
||||
|
||||
typedef std::vector<SequenceDataPtr> StreamBatch;
|
||||
|
||||
|
|
|
@ -22,9 +22,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
template <class ElemType>
|
||||
ReaderShim<ElemType>::ReaderShim(ReaderFactory factory)
|
||||
: m_layout(make_shared<MBLayout>()), m_factory(factory)
|
||||
: m_factory(factory)
|
||||
{
|
||||
m_layout->SetUniqueAxisName(L"ReaderShim");
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
|
@ -38,8 +37,7 @@ void ReaderShim<ElemType>::Init(const ConfigParameters& config)
|
|||
// otherwise deferring - synchronous execution during .get() call
|
||||
m_launchType = prefetch ? launch::async : launch::deferred;
|
||||
|
||||
auto numSeqsPerMBForAllEpochs = numberOfuttsPerMinibatchForAllEpochs;
|
||||
m_layout->Init(numSeqsPerMBForAllEpochs[0], 0);
|
||||
m_numParallelSequences = numberOfuttsPerMinibatchForAllEpochs[0];
|
||||
|
||||
m_reader = m_factory(config);
|
||||
m_streams = m_reader->GetStreamDescriptions();
|
||||
|
@ -105,7 +103,6 @@ string EnumerateInputs(const map<wstring, size_t> &nameToStreamId)
|
|||
template <class ElemType>
|
||||
bool ReaderShim<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
||||
{
|
||||
|
||||
// TODO: verify that the set of matrix names is identical
|
||||
// to the set of reader input names. Warn if it's a subset, throw
|
||||
// if it's a superset.
|
||||
|
@ -133,6 +130,15 @@ bool ReaderShim<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
|||
}
|
||||
}
|
||||
|
||||
// Reset stale mb layouts.
|
||||
// BUGBUG: This seems incorrect. (1) layouts should all be updated below, and (2) some of these layouts are the same, we are resetting them twice.
|
||||
for (const auto& iter : matrices)
|
||||
{
|
||||
iter.second.pMBLayout->Init(1, 0);
|
||||
}
|
||||
|
||||
// a map to generate error messages when checking layout constraints.
|
||||
map<wstring, wstring> layoutToInputMap;
|
||||
if (!minibatch.m_data.empty())
|
||||
{
|
||||
// TODO: Use alternating pinned buffer in the packer, do not copy anything, but pack into the pinned memory.
|
||||
|
@ -147,9 +153,31 @@ bool ReaderShim<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
|||
}
|
||||
|
||||
size_t streamId = m_nameToStreamId[mx.first];
|
||||
|
||||
|
||||
const auto& stream = minibatch.m_data[streamId];
|
||||
m_layout = stream->m_layout;
|
||||
|
||||
m_numParallelSequences = stream->m_layout->GetNumParallelSequences();
|
||||
|
||||
// This assert no longer holds - different inputs have different sequence lengths, resulting in different number
|
||||
// of parallel samples.
|
||||
// assert(m_numParallelSequences == minibatch.m_data.front()->m_layout->GetNumParallelSequences());
|
||||
|
||||
auto& layout = mx.second.pMBLayout;
|
||||
|
||||
if (layout->GetNumCols() == 0)
|
||||
{
|
||||
// layout is empty, copy layout info from the reader
|
||||
layout->CopyFrom(stream->m_layout, /*keepName*/ true);
|
||||
layoutToInputMap[layout->GetAxisName()] = mx.first;
|
||||
}
|
||||
else if (*layout != *stream->m_layout) // this does a deep value-level comparison
|
||||
{
|
||||
RuntimeError("Dynamic axis layout '%ls' is shared between inputs '%ls' and '%ls', but layouts generated "
|
||||
"from the input data are incompatible on this axis. Are you using different sequence lengths? "
|
||||
"Did you consider adding a DynamicAxis() to the Input nodes?",
|
||||
layout->GetAxisName(), layoutToInputMap[layout->GetAxisName()].c_str(), mx.first.c_str());
|
||||
}
|
||||
|
||||
size_t sampleSize = m_streams[streamId]->m_sampleLayout->GetNumElements();
|
||||
auto& matrix = matrices.GetInputMatrix<ElemType>(mx.first);
|
||||
FillMatrixFromStream(m_streams[streamId]->m_storageType, &matrix, sampleSize, stream);
|
||||
|
@ -200,13 +228,21 @@ bool ReaderShim<ElemType>::DataEnd() { return false; } // Note: Return value nev
|
|||
template <class ElemType>
|
||||
void ReaderShim<ElemType>::CopyMBLayoutTo(MBLayoutPtr layout)
|
||||
{
|
||||
layout->CopyFrom(m_layout);
|
||||
// This method is inherited from IDataReader and should be removed in the near future.
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
size_t ReaderShim<ElemType>::GetNumParallelSequences()
|
||||
{
|
||||
return m_layout->GetNumParallelSequences();
|
||||
// BUGBUG This is a property of the stream, of which this reader might produce several, with different nr. of
|
||||
// parallel sequences. Thus this property doesn't make sense anymore.
|
||||
// This method is called by
|
||||
// * DataReaderHelpers::GetNumSubminibatchesNeeded to estimate mb size
|
||||
// * ComputationNetwork::SetBatchNormalizationTimeConstants to compute learning rate per sample
|
||||
// * ComputationNetwork::SetBatchNormalizationTimeConstants to compute actual mb size and momentum per sample
|
||||
// * SGD::AdaptiveMinibatchSizing to compute learning rate per sample
|
||||
return m_numParallelSequences;
|
||||
}
|
||||
|
||||
template class ReaderShim<float>;
|
||||
|
|
|
@ -58,7 +58,7 @@ private:
|
|||
ReaderFactory m_factory;
|
||||
bool m_endOfEpoch;
|
||||
|
||||
MBLayoutPtr m_layout;
|
||||
size_t m_numParallelSequences;
|
||||
|
||||
std::map<std::wstring, size_t> m_nameToStreamId;
|
||||
std::vector<StreamDescriptionPtr> m_streams;
|
||||
|
|
|
@ -207,7 +207,7 @@ void SparsePCReader<ElemType>::StartMinibatchLoop(size_t mbSize, size_t /*epoch*
|
|||
// [out] each matrix resized if necessary containing data.
|
||||
// returns - true if there are more minibatches, false if no more minibatchs remain
|
||||
template <class ElemType>
|
||||
bool SparsePCReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
||||
bool SparsePCReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
|
||||
{
|
||||
// get out if they didn't call StartMinibatchLoop() first
|
||||
if (m_miniBatchSize == 0)
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
template <class ElemType>
|
||||
class SparsePCReader : public IDataReader
|
||||
class SparsePCReader : public DataReaderBase
|
||||
{
|
||||
ConfigParameters m_readerConfig;
|
||||
std::wstring m_file;
|
||||
|
@ -76,7 +76,7 @@ public:
|
|||
InitFromConfig(config);
|
||||
}
|
||||
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, size_t requestedEpochSamples = requestDataSize);
|
||||
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
|
||||
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
|
||||
|
||||
size_t GetNumParallelSequences()
|
||||
{
|
||||
|
|
|
@ -765,7 +765,7 @@ void UCIFastReader<ElemType>::StoreLabel(ElemType& labelStore, const LabelType&
|
|||
// [out] each matrix resized if necessary containing data.
|
||||
// returns - true if there are more minibatches, false if no more minibatchs remain
|
||||
template <class ElemType>
|
||||
bool UCIFastReader<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
|
||||
bool UCIFastReader<ElemType>::TryGetMinibatch(StreamMinibatchInputs& matrices)
|
||||
{
|
||||
bool minibatchesRemaining = true;
|
||||
if (m_pendingAsyncGetMinibatch.valid())
|
||||
|
|
|
@ -36,7 +36,7 @@ enum LabelKind
|
|||
};
|
||||
|
||||
template <class ElemType>
|
||||
class UCIFastReader : public IDataReader
|
||||
class UCIFastReader : public DataReaderBase
|
||||
{
|
||||
shared_ptr<UCIParser<ElemType, LabelType>> m_parser;
|
||||
size_t m_mbSize; // size of minibatch requested
|
||||
|
@ -151,7 +151,7 @@ public:
|
|||
|
||||
virtual void StartDistributedMinibatchLoop(size_t mbSize, size_t epoch, size_t subsetNum, size_t numSubsets, size_t requestedEpochSamples = requestDataSize) override;
|
||||
|
||||
virtual bool GetMinibatch(StreamMinibatchInputs& matrices);
|
||||
virtual bool TryGetMinibatch(StreamMinibatchInputs& matrices);
|
||||
|
||||
bool GetMinibatchImpl(StreamMinibatchInputs& matrices);
|
||||
|
||||
|
|
|
@ -34,9 +34,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
const MPIWrapperPtr& mpi)
|
||||
{
|
||||
// Reading consists of a sequence of Reader API calls:
|
||||
// - GetMinibatch() --fills the inputMatrices
|
||||
// - GetMinibatch() --fills the inputMatrices and copies the MBLayout from Reader into inputMatrices
|
||||
// - SetActualMiniBatchSizeFromFeatures() --tells Network to resize the nodes' buffers
|
||||
// - CopyMBLayoutTo() --copies the MBLayout from Reader to Network
|
||||
// with the special twist that in presence of parallelization, there is some decimation involved.
|
||||
|
||||
bool wasDataRead = trainSetDataReader.GetMinibatch(inputMatrices); // fill in the minibatch data into the Input nodes' buffers directly
|
||||
|
@ -61,13 +60,23 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
trainSetDataReader.GetMinibatch4SE(*latticeinput, *uids, *boundaries, *extrauttmap);
|
||||
}
|
||||
|
||||
// get layout meta-data
|
||||
// BUGBUG (Issue #95): must be adapted for multiple MBLayouts
|
||||
trainSetDataReader.CopyMBLayoutTo(net->GetMBLayoutPtrOfNetwork());
|
||||
|
||||
// TODO: move this into shim for the old readers.
|
||||
// decimate if needed. Decimation happens in-place.
|
||||
// This is only allowed for old readers, which support a single layout for all inputs.
|
||||
if (!useDistributedMBReading && useParallelTrain)
|
||||
DecimateMinibatchInPlace<ElemType>(inputMatrices, mpi->NumNodesInUse(), mpi->CurrentNodeRank(), net->GetMBLayoutPtrOfNetwork());
|
||||
{
|
||||
auto& pMBLayout = net->GetMBLayoutPtrOfNetwork();
|
||||
|
||||
// Verify that there's indeed a single layout
|
||||
for (const auto& iter : inputMatrices)
|
||||
{
|
||||
assert(iter.second.pMBLayout == pMBLayout);
|
||||
// TODO: This must be a runtime check, not an assert().
|
||||
UNUSED(iter);
|
||||
}
|
||||
|
||||
DecimateMinibatchInPlace<ElemType>(inputMatrices, mpi->NumNodesInUse(), mpi->CurrentNodeRank(), pMBLayout);
|
||||
}
|
||||
|
||||
// reader will have resized input node's m_value directly. Nodes must be notified to do necessary internal state updates from that.
|
||||
// TODO: This is a stopgap. SGD will at some point change from sets of matrices to sets of nodes. Then this will become much simpler.
|
||||
|
@ -139,7 +148,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// decimatedMB[name]->SetValue(mat.Reshaped(nRows*nSequence, nT).RowSlice( st*nRows , (en-st)*nRows).Reshaped(nRows, nNewParallelSequence*nT));
|
||||
}
|
||||
// decimate MBLayout as well
|
||||
pDecimateMBLayout = make_shared<MBLayout>(numNewParallelSequence, nT);
|
||||
pDecimateMBLayout = make_shared<MBLayout>(numNewParallelSequence, nT, L"");
|
||||
pDecimateMBLayout->SetAxisName(pMBLayout->GetAxisName());
|
||||
#if 1
|
||||
// now copy over all sequence info records that are inside the range, with adjusted 's'
|
||||
|
|
|
@ -0,0 +1,129 @@
|
|||
# Copyright (c) Microsoft. All rights reserved.
|
||||
# Licensed under the MIT license. See LICENSE file in the project root for full license information.
|
||||
|
||||
RootDir = ".."
|
||||
|
||||
ConfigDir = "$RootDir$/Config"
|
||||
DataDir = "$RootDir$/Data"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
ModelDir = "$OutputDir$/Models"
|
||||
|
||||
command=Train #:Write
|
||||
deviceId = $DeviceId$
|
||||
modelPath="$ModelDir$/seqcla.dnn"
|
||||
|
||||
Train=[
|
||||
action="train"
|
||||
run=BrainScriptNetworkBuilder
|
||||
|
||||
BrainScriptNetworkBuilder=[
|
||||
Macros = [
|
||||
// define "last hidden state of sequence" in the LSTM (really for any sequence though)
|
||||
TakeRight (N, x) = BS.Sequences._Take(FutureValue, N, x)
|
||||
Last(x) = TakeRight(1, x)
|
||||
]
|
||||
Layers = [
|
||||
EmbeddingLayer(input, vocabSize, embeddingDim, embeddingPath) = [
|
||||
embedding = Transpose(LearnableParameter(vocabSize, embeddingDim, learningRateMultiplier = 0.0, init = 'fromFile', initFromFilePath = embeddingPath))
|
||||
lookup = GatherPacked(features, embedding)
|
||||
].lookup
|
||||
DenseLayer(input, inputSize, outputSize, activation) = [
|
||||
z = BFF(input, outputSize, inputSize).z
|
||||
act = activation(z)
|
||||
].act
|
||||
LSTMLayer(input, inputSize, outputSize, cellSize, selector) = [
|
||||
lstm = BS.RNNs.RecurrentLSTMP(inputSize, outputSize, cellSize, input)
|
||||
result = selector(lstm)
|
||||
].result
|
||||
]
|
||||
|
||||
// LSTM params
|
||||
lstmDim = 25
|
||||
cellDim = 25
|
||||
|
||||
// model
|
||||
numLabels = 5
|
||||
vocab = 2000
|
||||
embedDim = 50
|
||||
|
||||
// set up features and labels
|
||||
t = DynamicAxis()
|
||||
features = Input(1, dynamicAxis=t) # Input has shape (1,t)
|
||||
labels = Input(numLabels) # Input has shape (numLabels,*) where all sequences in *=1
|
||||
|
||||
// load the pre-learned word embedding matrix
|
||||
l1 = Layers.EmbeddingLayer(features, vocab, embedDim, 'embeddingmatrix.txt')
|
||||
l2 = Layers.LSTMLayer(l1, embedDim, lstmDim, cellDim, Macros.Last)
|
||||
l3 = Layers.DenseLayer(l2, lstmDim, numLabels, Pass)
|
||||
out = Pass(l3, tag='output')
|
||||
|
||||
// Make sure the trainer understands that the time dimension of l3 is actually the same as that of labels.
|
||||
l3p = ReconcileDynamicAxis(l3, labels)
|
||||
|
||||
// training criteria
|
||||
ce = CrossEntropyWithSoftmax(labels, l3p, tag='criterion') // this is the training objective
|
||||
wer = ErrorPrediction (labels, l3p, tag='evaluation') // this also gets tracked
|
||||
]
|
||||
|
||||
SGD = [
|
||||
epochSize = 0
|
||||
minibatchSize = 200
|
||||
maxEpochs = 5
|
||||
momentumPerMB = 0.9
|
||||
learningRatesPerMB = 0.1
|
||||
]
|
||||
|
||||
reader = [
|
||||
readerType = "CNTKTextFormatReader"
|
||||
file = "$DataDir$/Train.txt"
|
||||
|
||||
input = [
|
||||
features=[
|
||||
alias = "x"
|
||||
dim = 1
|
||||
format = "dense"
|
||||
]
|
||||
labels=[
|
||||
alias = "y"
|
||||
dim = 5
|
||||
format = "dense"
|
||||
]
|
||||
]
|
||||
]
|
||||
outputPath = "$OutputDir$/output.txt" # dump the output as text?
|
||||
]
|
||||
|
||||
Write=[
|
||||
action="test"
|
||||
run=BrainScriptNetworkBuilder
|
||||
|
||||
format = [
|
||||
# %n = minibatch, %x = shape, %d = sequenceId
|
||||
sequencePrologue=%d\t|w.shape %x\n%d\t|w\s
|
||||
sampleSeparator=\n%d\t|w\s
|
||||
elementSeparator=\s
|
||||
]
|
||||
|
||||
modelFile = "$ModelDir$/seqcla.dnn"
|
||||
|
||||
reader = [
|
||||
|
||||
readerType = "CNTKTextFormatReader"
|
||||
file = "$DataDir$/Train.txt"
|
||||
|
||||
input = [
|
||||
features=[
|
||||
alias = "x"
|
||||
dim = 1
|
||||
format = "dense"
|
||||
]
|
||||
labels=[
|
||||
alias = "y"
|
||||
dim = 5
|
||||
format = "dense"
|
||||
]
|
||||
]
|
||||
|
||||
]
|
||||
outputPath = "$OutputDir$/output.txt" # dump the output as text?
|
||||
]
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,16 @@
|
|||
#!/bin/bash
|
||||
|
||||
. $TEST_ROOT_DIR/run-test-common
|
||||
|
||||
ConfigDir=$TEST_DIR/Config
|
||||
|
||||
# cntkrun <CNTK config file name> <additional CNTK args>
|
||||
DeleteModelsAfterTest=0
|
||||
cntkrun seqcla.cntk || exit $?
|
||||
echo === Deleting last epoch data
|
||||
rm $TEST_RUN_DIR/Models/*.dnn
|
||||
echo ==== Re-running from checkpoint
|
||||
DeleteExistingModels=0
|
||||
DeleteModelsAfterTest=1
|
||||
# cntkrun <CNTK config file name> <additional CNTK args>
|
||||
cntkrun seqcla.cntk 'makeMode=true' || exit $?
|
|
@ -0,0 +1,33 @@
|
|||
dataDir: ./Data
|
||||
tags:
|
||||
# running on every BVT job in 'S' (Speech) leg in Debug-GPU and Release-CPU configurations:
|
||||
- bvt-s (build_sku == 'gpu') and ((flavor=='debug') ^ (device=='cpu'))
|
||||
# running unconditionally on every Nightly job in 'S' leg
|
||||
- nightly-s (build_sku == 'gpu')
|
||||
|
||||
testCases:
|
||||
CNTK Run must be completed:
|
||||
patterns:
|
||||
- __COMPLETED__
|
||||
|
||||
Must train epochs in exactly same order and parameters:
|
||||
patterns:
|
||||
- Starting Epoch {{integer}}
|
||||
- learning rate per sample = {{float}}
|
||||
- momentum = {{float}}
|
||||
|
||||
Epochs must be finished with expected results:
|
||||
patterns:
|
||||
- Finished Epoch[{{integer}} of {{integer}}]
|
||||
- TrainLossPerSample = {{float,tolerance=.5%}}
|
||||
# TODO GPU has an initial EvalErr rate which is larger than on CPU and otherwise.
|
||||
# In later epochs the results are aligned. Why?
|
||||
- EvalErrPerSample = {{float,tolerance=13%}}
|
||||
- AvgLearningRatePerSample = {{float,tolerance=0.001%}}
|
||||
|
||||
# Per-minibatch training results must match:
|
||||
# patterns:
|
||||
# - Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}}
|
||||
# - SamplesSeen = {{integer}}
|
||||
# - TrainLossPerSample = {{float,tolerance=.5%}}
|
||||
# - EvalErr[0]PerSample = {{float,tolerance=.5%}}
|
|
@ -110,7 +110,6 @@
|
|||
<ClCompile Include="..\..\..\Source\CNTK\BrainScript\BrainScriptEvaluator.cpp" />
|
||||
<ClCompile Include="..\..\..\Source\CNTK\BrainScript\BrainScriptParser.cpp" />
|
||||
<ClCompile Include="..\..\..\Source\CNTK\BrainScript\BrainScriptTest.cpp" />
|
||||
<ClCompile Include="..\..\..\Source\CNTK\BrainScript\ExperimentalNetworkBuilder.cpp" />
|
||||
<ClCompile Include="..\..\..\Source\Common\Config.cpp" />
|
||||
<ClCompile Include="..\..\..\Source\Common\DataReader.cpp" />
|
||||
<ClCompile Include="..\..\..\Source\Common\DataWriter.cpp" />
|
||||
|
|
|
@ -34,9 +34,6 @@
|
|||
<ClCompile Include="..\..\..\Source\CNTK\BrainScript\BrainScriptTest.cpp">
|
||||
<Filter>From BrainScript</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\..\Source\CNTK\BrainScript\ExperimentalNetworkBuilder.cpp">
|
||||
<Filter>From BrainScript</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="Config">
|
||||
|
|
|
@ -189,20 +189,20 @@ struct ReaderFixture
|
|||
|
||||
for (auto cnt = 0; dataReader.GetMinibatch(map) && cnt < m_maxMiniBatchCount; cnt++)
|
||||
{
|
||||
MBLayoutPtr pMBlayoutPtr = make_shared<MBLayout>();
|
||||
dataReader.CopyMBLayoutTo(pMBlayoutPtr);
|
||||
// Process the Feature Matri(x|ces)
|
||||
for (auto i = 0; i < numFeatureFiles; i++)
|
||||
{
|
||||
wstring name = numFeatureFiles > 1 ? L"features" + std::to_wstring(i + 1) : L"features";
|
||||
OutputMatrix(map.GetInputMatrix<ElemType>(name), *pMBlayoutPtr, outputFile);
|
||||
auto& layoutPtr = map.GetInput(name).pMBLayout;
|
||||
OutputMatrix(map.GetInputMatrix<ElemType>(name), *layoutPtr, outputFile);
|
||||
}
|
||||
|
||||
// Process the Label Matri(x|ces)
|
||||
for (auto i = 0; i < numLabelFiles; i++)
|
||||
{
|
||||
wstring name = numLabelFiles > 1 ? L"labels" + std::to_wstring(i + 1) : L"labels";
|
||||
OutputMatrix(map.GetInputMatrix<ElemType>(name), *pMBlayoutPtr, outputFile);
|
||||
auto& layoutPtr = map.GetInput(name).pMBLayout;
|
||||
OutputMatrix(map.GetInputMatrix<ElemType>(name), *layoutPtr, outputFile);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -255,7 +255,10 @@ struct ReaderFixture
|
|||
std::vector<shared_ptr<Matrix<ElemType>>> features;
|
||||
std::vector<shared_ptr<Matrix<ElemType>>> labels;
|
||||
|
||||
MBLayoutPtr pMBLayout = make_shared<MBLayout>();
|
||||
// For the time being, use the same layout across all inputs.
|
||||
// TODO: add an option to create per-input layouts (once we have test-cases with different layouts)
|
||||
MBLayoutPtr pMBLayout = make_shared<MBLayout>(1, 0, L"X");
|
||||
|
||||
for (auto i = 0; i < numFeatureFiles; i++)
|
||||
{
|
||||
features.push_back(make_shared<Matrix<ElemType>>(0));
|
||||
|
|
Загрузка…
Ссылка в новой задаче