now accepts mismatching MBLayouts during validation, relying on runtime checks only. Updated ATIS sample.

This commit is contained in:
Frank Seide 2016-08-11 17:29:21 -07:00
Родитель b950b9ede4
Коммит f88fcdb3f2
5 изменённых файлов: 52 добавлений и 64 удалений

Просмотреть файл

@ -11,14 +11,14 @@ parallelTrain = true
#stderr = $WorkDir$/log
command = TrainSlotTagger:RunSlotTagger:EvalSlotTagger
command = TrainATIS:RunATIS:EvalATIS
vocabSize = 943 # number of words
numLabels = 129 # number of slot labels
numIntents = 26 # number of intent labels
# The command to train the LSTM model
TrainSlotTagger = [
TrainATIS = [
action = "train"
BrainScriptNetworkBuilder = [
inputDim = $vocabSize$
@ -53,7 +53,7 @@ TrainSlotTagger = [
evaluationNodes = (errs)
outputNodes = (z)
]
# enable this one instead for intent classification
# rename this to BrainScriptNetworkBuilder to switch to intent-classification task
Intent_BrainScriptNetworkBuilder = [
inputDim = $vocabSize$
labelDim = $numIntents$
@ -61,18 +61,12 @@ TrainSlotTagger = [
#hiddenDim = 300
hiddenDim = 150
RecSplice (a2) = [ # splice with reconciliation
i1 = a2[0]
i2 = ReconcileDynamicAxis (a2[1], i1)
res = Splice (i1 : i2)
].res
model = Sequential (
Parallel ((DelayLayer{T=1} : Identity : DelayLayer{T=-1}), Splice) : # 3-word window
EmbeddingLayer {embDim} : # embedding
RecurrentLSTMLayer {hiddenDim} : BS.Sequences.Last : # LSTM state, final state
#Parallel ((Sequential (RecurrentLSTMLayer {hiddenDim} : BS.Sequences.Last):
# Sequential (RecurrentLSTMLayer {hiddenDim, goBackwards=true}: BS.Sequences.First)), RecSplice) : # bidirectional LSTM
#Parallel ((Sequential (RecurrentLSTMLayer {hiddenDim} : BS.Sequences.Last):
Sequential (RecurrentLSTMLayer {hiddenDim, goBackwards=true} : BS.Sequences.First)), Splice) : # bidirectional LSTM
DenseLayer {labelDim, initValueScale=7} # output layer
)
@ -82,7 +76,7 @@ TrainSlotTagger = [
intentLabels = Input {labelDim}
# model application
z = ReconcileDynamicAxis (model (query), intentLabels)
z = model (query)
# loss and metric
ce = CrossEntropyWithSoftmax (intentLabels, z)
@ -96,30 +90,18 @@ TrainSlotTagger = [
]
SGD = [
# maximum number of epochs
maxEpochs = 20 # set to 1 so this can be added to regression test. Increase to 20 get a good accuracy
#maxEpochs = 200 # set to 1 so this can be added to regression test. Increase to 20 get a good accuracy
maxEpochs = 20 ; epochSize = 36000
# for each epoch, maximum number of input samples(words) is set below
epochSize = 36000
# minibatchSize should be larger than the maximum sentence length
minibatchSize = 70
learningRatesPerSample = 0.01*2:0.005*12:0.001
#learningRatesPerSample = 0.01*20:0.005*120:0.001
gradUpdateType = "FSAdaGrad"
gradientClippingWithTruncation = true
clippingThresholdPerSample = 15.0
gradientClippingWithTruncation = true ; clippingThresholdPerSample = 15.0
# number of minibatches to report progress
firstMBsToShowResult = 10
numMBsToShowResult = 100
# if validation shows that the model has no improvement, then do back-up to the previously
# estimated model and reduce learning rate
loadBestModel = true
firstMBsToShowResult = 10 ; numMBsToShowResult = 100
parallelTrain = [
parallelizationMethod = "DataParallelSGD"
@ -144,21 +126,19 @@ TrainSlotTagger = [
]
# Run the model to predict slot labels
RunSlotTagger = [
RunATIS = [
action = "write"
BrainScriptNetworkBuilder = [
modelAsTrained = BS.Network.Load ("$modelPath$")
final = Hardmax (modelAsTrained.z) # make a decision
labels = Pass (modelAsTrained.slotLabels)
#labels = Pass (modelAsTrained.slotLabels)
# enable this for intent classification:
#labels = Pass (modelAsTrained.intentLabels)
#t = DynamicAxis()
labels = Pass (modelAsTrained.intentLabels)
t = DynamicAxis()
]
outputPath = $WorkDir$/model.writeaction
outputNodeNames = slotLabels:final
# enable this for intent classification:
#outputNodeNames = intentLabels:final
outputNodeNames = intentLabels:slotLabels:final
reader = [
readerType = "CNTKTextFormatReader"
@ -173,7 +153,7 @@ RunSlotTagger = [
]
# Evaluate the model's slot-tagging accuracy (as an error count)
EvalSlotTagger = [
EvalATIS = [
action = "eval"
modelPath = $modelPath$ # from outside
reader = [

Просмотреть файл

@ -239,7 +239,7 @@ Exp = CNTK2.Exp
Floor = CNTK2.Floor
Log = CNTK2.Log
Minus = CNTK2.Minus
Pass = CNTK2.Identity
Pass = CNTK2.Pass
Plus = CNTK2.Plus
RectifiedLinear = CNTK2.ReLU # deprecated
ReLU = CNTK2.ReLU
@ -380,10 +380,11 @@ CNTK2 = [
LessEqual(_, y, tag='') = new ComputationNode [ operation = 'LessEqual' ; inputs = (_ : y) /*plus the function args*/ ]
// 13. Others
Identity(_, tag='') = new ComputationNode [ operation = 'Pass' ; inputs = _ /*plus the function args*/ ]
Pass(_, tag='') = new ComputationNode [ operation = 'Pass' ; inputs = _ /*plus the function args*/ ]
Identity = Pass
]
# Parameter{} can do several forms of initialization. It is no longer required to say 'init="kind"', so we can clean these up a bit.
# Parameter{} can do several forms of initialization.
# - initValue=scalar, value=array --> initialize from this value --array form not implemented yet
# - initFromFilePath="..." --> read from a data file
# - init="uniform|gaussian" (random init scaled by initValueScale). Warning: This has magic scaling factors. TODO: document them here
@ -393,7 +394,9 @@ CNTK2 = [
# - init="fixedValue", value from 'value'
# Warning: Current config will behave unexpected if user mistypes 'initValue' as 'value' (which will be ignored, defaulting to "uniform" init)
Parameter {outputDim, inputDim, learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, value = 0/*deprecated*/, initValue = '', initFromFilePath = '', initFromLiteral = ''/*deprecated*/, initOnCPUOnly=true, randomSeed=-1, tag=''} = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ dims = (outputDim : inputDim) ] /*plus the function args*/ ]
LearnableParameter = Parameter // deprecated
LearnableParameter = Parameter // deprecated
# TODO: make Parameter take tensor dims?
ParameterTensor {dims, learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, value = 0, initValue = '', initFromFilePath = '', initFromLiteral = '', initOnCPUOnly=true, randomSeed=-1, tag=''} = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]
ConstantFromString(literal, tag='') = ParameterTensor((0)/*dim, will be inferred*/, initFromLiteral = literal, learningRateMultiplier = 0.0)

Просмотреть файл

@ -962,7 +962,7 @@ static inline std::pair<size_t, size_t> ColumnRangeWithMBLayoutFor(size_t numCol
if (fr.m_broadcastAllowed && !pMBLayout && numCols == 1)
return std::pair<size_t, size_t>(0, numCols);
if (fr.m_pMBLayout && pMBLayout && *fr.m_pMBLayout == *pMBLayout)
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix. They are compatible though--are you missing a ReconcileDynamicAxis operation?");
;// LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix. They are compatible though--are you missing a ReconcileDynamicAxis operation?");
else
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix.");
}
@ -1055,8 +1055,8 @@ static inline std::pair<DimensionVector, DimensionVector> TensorSliceWithMBLayou
if (fr.m_pMBLayout /*get data for a loop*/ && !pMBLayout /*'data' is not samples*/ && fr.m_broadcastAllowed /*we're OK with that*/)
; // the time dimension is broadcasting--leave it as is
else if (fr.m_pMBLayout && pMBLayout && *fr.m_pMBLayout == *pMBLayout)
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix. They are compatible though--are you missing a ReconcileDynamicAxis operation? %s vs. %s",
static_cast<string>(*(fr.m_pMBLayout)).c_str(), static_cast<string>(*(pMBLayout)).c_str());
; //LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix. They are compatible though--are you missing a ReconcileDynamicAxis operation? %s vs. %s",
// static_cast<string>(*(fr.m_pMBLayout)).c_str(), static_cast<string>(*(pMBLayout)).c_str());
else
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix: %s vs. %s",
static_cast<string>(*(fr.m_pMBLayout)).c_str(), static_cast<string>(*(pMBLayout)).c_str());

Просмотреть файл

@ -87,12 +87,21 @@ void ComputationNode<ElemType>::Backprop(const FrameRange& fr, bool childrenInTh
// subroutines for Validate() implementations
// -----------------------------------------------------------------------
static void InconsistentMBLayout(const ComputationNodeBase& us, const ComputationNodeBase& which, ComputationNodeBase& vsWhich)
// compare two MBLayouts, and alert if they are different
void ComputationNodeBase::ValidateMBLayout(const ComputationNodeBasePtr which, const ComputationNodeBasePtr vsWhich) const
{
#if 1
RuntimeError("%ls: Dynamic axes mismatches between %ls and %ls. If this is by design, use ReconcileDynamicAxis().",
us.NodeDescription().c_str(), which.NodeDescription().c_str(), vsWhich.NodeDescription());
if (!which->HasMBLayout() || !vsWhich->HasMBLayout() || which->GetMBLayout() == vsWhich->GetMBLayout())
return;
// MBLayouts are inconsistent
#if 0
// can't have that
RuntimeError("%ls: Dynamic axes mismatch between %ls and %ls. If this is by design, use ReconcileDynamicAxis().",
NodeDescription().c_str(), which->NodeDescription().c_str(), vsWhich->NodeDescription());
#else
// We will let this slip with a reminder, assuming that this will be caught at runtime.
// By allowing this, users will not need ReconcileDynamicAxis() for reductions over a sequence like BS.Sequences.Last().
fprintf(stderr, "WARNING: %ls: Dynamic axes mismatch between %ls and %ls. If they are incompatible, this will fail later. If this is by design, use ReconcileDynamicAxis().\n",
NodeDescription().c_str(), which->NodeDescription().c_str(), vsWhich->NodeDescription().c_str());
#endif
}
@ -104,20 +113,20 @@ static void InconsistentMBLayout(const ComputationNodeBase& us, const Computatio
// - if there are more than one different layouts involved, this function will fail
void ComputationNodeBase::InferMBLayoutFromInputsForStandardCase(bool isFinalValidationPass)
{
MBLayoutPtr pMBLayout; // start with NULL layout
for (auto child : m_inputs)
ComputationNodeBasePtr firstInputWithMBLayout;
for (auto input : m_inputs)
{
if (!child) // node not set yet (DelayedValueNodeBase seems to allow this)--BUGBUG: Then this function won't operate correctly.
if (!input) // node not set yet (DelayedValueNodeBase seems to allow this)--BUGBUG: Then this function won't operate correctly.
;
else if (!child->m_pMBLayout) // NULL layout (typical for parameter nodes)
else if (!input->m_pMBLayout) // NULL layout (typical for parameter nodes)
;
else if (!pMBLayout) // first non-NULL layout: just copy it
pMBLayout = child->m_pMBLayout;
else if (pMBLayout != child->m_pMBLayout && isFinalValidationPass) // got a layout--compare whether it is the same
InconsistentMBLayout(*this, *this, *child);
else if (!firstInputWithMBLayout) // first input with layout: remember this child
firstInputWithMBLayout = input;
else if (isFinalValidationPass) // got a layout--compare whether it is the same
ValidateMBLayout(firstInputWithMBLayout, input);
}
// all are consistent: install it
LinkToMBLayout(pMBLayout);
LinkToMBLayout(firstInputWithMBLayout ? firstInputWithMBLayout->m_pMBLayout : nullptr);
}
// single input that maps its input element-wise (e.g. Sigmoid)
@ -140,12 +149,8 @@ void ComputationNodeBase::ValidateBinaryZip(bool isFinalValidationPass, bool all
ValidateInferBinaryInputDims();
if (isFinalValidationPass &&
Input(0)->HasMBLayout() && Input(1)->HasMBLayout() &&
Input(0)->GetMBLayout() != Input(1)->GetMBLayout())
{
InconsistentMBLayout(*this, *Input(0), *Input(1));
}
if (isFinalValidationPass)
ValidateMBLayout(Input(0), Input(1));
// result has tensor shape with dimensions being the max over both
let shape0 = GetInputSampleLayout(0);
@ -187,8 +192,7 @@ void ComputationNodeBase::ValidateNaryZip(bool isFinalValidationPass, bool allow
if (isFinalValidationPass)
for (size_t i = 0; i < numInputs; i++)
for (size_t j = i + 1; j < numInputs; j++)
if (Input(i)->HasMBLayout() && Input(j)->HasMBLayout() && Input(i)->GetMBLayout() != Input(j)->GetMBLayout())
InconsistentMBLayout(*this, *Input(i), *Input(j));
ValidateMBLayout(Input(i), Input(j));
// result has tensor shape with dimensions being the max over all inputs
let shape0 = GetInputSampleLayout(0);

Просмотреть файл

@ -679,6 +679,7 @@ protected:
void ValidateBinaryZip(bool isFinalValidationPass, bool allowBroadcast);
void ValidateBinaryReduce(bool isFinalValidationPass);
void ValidateNaryZip(bool isFinalValidationPass, bool allowBroadcast, size_t numInputs);
void ValidateMBLayout(const ComputationNodeBasePtr which, const ComputationNodeBasePtr vsWhich) const;
void InferMBLayoutFromInputsForStandardCase(bool isFinalValidationPass);
virtual void ValidateInferInputDimsFrom(const TensorShape&) = 0; // (implemented by ComputationNode<ElemType>)