now accepts mismatching MBLayouts during validation, relying on runtime checks only. Updated ATIS sample.
This commit is contained in:
Родитель
b950b9ede4
Коммит
f88fcdb3f2
|
@ -11,14 +11,14 @@ parallelTrain = true
|
|||
|
||||
#stderr = $WorkDir$/log
|
||||
|
||||
command = TrainSlotTagger:RunSlotTagger:EvalSlotTagger
|
||||
command = TrainATIS:RunATIS:EvalATIS
|
||||
|
||||
vocabSize = 943 # number of words
|
||||
numLabels = 129 # number of slot labels
|
||||
numIntents = 26 # number of intent labels
|
||||
|
||||
# The command to train the LSTM model
|
||||
TrainSlotTagger = [
|
||||
TrainATIS = [
|
||||
action = "train"
|
||||
BrainScriptNetworkBuilder = [
|
||||
inputDim = $vocabSize$
|
||||
|
@ -53,7 +53,7 @@ TrainSlotTagger = [
|
|||
evaluationNodes = (errs)
|
||||
outputNodes = (z)
|
||||
]
|
||||
# enable this one instead for intent classification
|
||||
# rename this to BrainScriptNetworkBuilder to switch to intent-classification task
|
||||
Intent_BrainScriptNetworkBuilder = [
|
||||
inputDim = $vocabSize$
|
||||
labelDim = $numIntents$
|
||||
|
@ -61,18 +61,12 @@ TrainSlotTagger = [
|
|||
#hiddenDim = 300
|
||||
hiddenDim = 150
|
||||
|
||||
RecSplice (a2) = [ # splice with reconciliation
|
||||
i1 = a2[0]
|
||||
i2 = ReconcileDynamicAxis (a2[1], i1)
|
||||
res = Splice (i1 : i2)
|
||||
].res
|
||||
|
||||
model = Sequential (
|
||||
Parallel ((DelayLayer{T=1} : Identity : DelayLayer{T=-1}), Splice) : # 3-word window
|
||||
EmbeddingLayer {embDim} : # embedding
|
||||
RecurrentLSTMLayer {hiddenDim} : BS.Sequences.Last : # LSTM state, final state
|
||||
#Parallel ((Sequential (RecurrentLSTMLayer {hiddenDim} : BS.Sequences.Last):
|
||||
# Sequential (RecurrentLSTMLayer {hiddenDim, goBackwards=true}: BS.Sequences.First)), RecSplice) : # bidirectional LSTM
|
||||
#Parallel ((Sequential (RecurrentLSTMLayer {hiddenDim} : BS.Sequences.Last):
|
||||
Sequential (RecurrentLSTMLayer {hiddenDim, goBackwards=true} : BS.Sequences.First)), Splice) : # bidirectional LSTM
|
||||
DenseLayer {labelDim, initValueScale=7} # output layer
|
||||
)
|
||||
|
||||
|
@ -82,7 +76,7 @@ TrainSlotTagger = [
|
|||
intentLabels = Input {labelDim}
|
||||
|
||||
# model application
|
||||
z = ReconcileDynamicAxis (model (query), intentLabels)
|
||||
z = model (query)
|
||||
|
||||
# loss and metric
|
||||
ce = CrossEntropyWithSoftmax (intentLabels, z)
|
||||
|
@ -96,30 +90,18 @@ TrainSlotTagger = [
|
|||
]
|
||||
|
||||
SGD = [
|
||||
# maximum number of epochs
|
||||
maxEpochs = 20 # set to 1 so this can be added to regression test. Increase to 20 get a good accuracy
|
||||
#maxEpochs = 200 # set to 1 so this can be added to regression test. Increase to 20 get a good accuracy
|
||||
maxEpochs = 20 ; epochSize = 36000
|
||||
|
||||
# for each epoch, maximum number of input samples(words) is set below
|
||||
epochSize = 36000
|
||||
|
||||
# minibatchSize should be larger than the maximum sentence length
|
||||
minibatchSize = 70
|
||||
|
||||
learningRatesPerSample = 0.01*2:0.005*12:0.001
|
||||
#learningRatesPerSample = 0.01*20:0.005*120:0.001
|
||||
|
||||
gradUpdateType = "FSAdaGrad"
|
||||
|
||||
gradientClippingWithTruncation = true
|
||||
clippingThresholdPerSample = 15.0
|
||||
gradientClippingWithTruncation = true ; clippingThresholdPerSample = 15.0
|
||||
|
||||
# number of minibatches to report progress
|
||||
firstMBsToShowResult = 10
|
||||
numMBsToShowResult = 100
|
||||
|
||||
# if validation shows that the model has no improvement, then do back-up to the previously
|
||||
# estimated model and reduce learning rate
|
||||
loadBestModel = true
|
||||
firstMBsToShowResult = 10 ; numMBsToShowResult = 100
|
||||
|
||||
parallelTrain = [
|
||||
parallelizationMethod = "DataParallelSGD"
|
||||
|
@ -144,21 +126,19 @@ TrainSlotTagger = [
|
|||
]
|
||||
|
||||
# Run the model to predict slot labels
|
||||
RunSlotTagger = [
|
||||
RunATIS = [
|
||||
action = "write"
|
||||
BrainScriptNetworkBuilder = [
|
||||
modelAsTrained = BS.Network.Load ("$modelPath$")
|
||||
final = Hardmax (modelAsTrained.z) # make a decision
|
||||
labels = Pass (modelAsTrained.slotLabels)
|
||||
#labels = Pass (modelAsTrained.slotLabels)
|
||||
# enable this for intent classification:
|
||||
#labels = Pass (modelAsTrained.intentLabels)
|
||||
#t = DynamicAxis()
|
||||
labels = Pass (modelAsTrained.intentLabels)
|
||||
t = DynamicAxis()
|
||||
]
|
||||
|
||||
outputPath = $WorkDir$/model.writeaction
|
||||
outputNodeNames = slotLabels:final
|
||||
# enable this for intent classification:
|
||||
#outputNodeNames = intentLabels:final
|
||||
outputNodeNames = intentLabels:slotLabels:final
|
||||
|
||||
reader = [
|
||||
readerType = "CNTKTextFormatReader"
|
||||
|
@ -173,7 +153,7 @@ RunSlotTagger = [
|
|||
]
|
||||
|
||||
# Evaluate the model's slot-tagging accuracy (as an error count)
|
||||
EvalSlotTagger = [
|
||||
EvalATIS = [
|
||||
action = "eval"
|
||||
modelPath = $modelPath$ # from outside
|
||||
reader = [
|
||||
|
|
|
@ -239,7 +239,7 @@ Exp = CNTK2.Exp
|
|||
Floor = CNTK2.Floor
|
||||
Log = CNTK2.Log
|
||||
Minus = CNTK2.Minus
|
||||
Pass = CNTK2.Identity
|
||||
Pass = CNTK2.Pass
|
||||
Plus = CNTK2.Plus
|
||||
RectifiedLinear = CNTK2.ReLU # deprecated
|
||||
ReLU = CNTK2.ReLU
|
||||
|
@ -380,10 +380,11 @@ CNTK2 = [
|
|||
LessEqual(_, y, tag='') = new ComputationNode [ operation = 'LessEqual' ; inputs = (_ : y) /*plus the function args*/ ]
|
||||
|
||||
// 13. Others
|
||||
Identity(_, tag='') = new ComputationNode [ operation = 'Pass' ; inputs = _ /*plus the function args*/ ]
|
||||
Pass(_, tag='') = new ComputationNode [ operation = 'Pass' ; inputs = _ /*plus the function args*/ ]
|
||||
Identity = Pass
|
||||
]
|
||||
|
||||
# Parameter{} can do several forms of initialization. It is no longer required to say 'init="kind"', so we can clean these up a bit.
|
||||
# Parameter{} can do several forms of initialization.
|
||||
# - initValue=scalar, value=array --> initialize from this value --array form not implemented yet
|
||||
# - initFromFilePath="..." --> read from a data file
|
||||
# - init="uniform|gaussian" (random init scaled by initValueScale). Warning: This has magic scaling factors. TODO: document them here
|
||||
|
@ -393,7 +394,9 @@ CNTK2 = [
|
|||
# - init="fixedValue", value from 'value'
|
||||
# Warning: Current config will behave unexpected if user mistypes 'initValue' as 'value' (which will be ignored, defaulting to "uniform" init)
|
||||
Parameter {outputDim, inputDim, learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, value = 0/*deprecated*/, initValue = '', initFromFilePath = '', initFromLiteral = ''/*deprecated*/, initOnCPUOnly=true, randomSeed=-1, tag=''} = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ dims = (outputDim : inputDim) ] /*plus the function args*/ ]
|
||||
LearnableParameter = Parameter // deprecated
|
||||
|
||||
LearnableParameter = Parameter // deprecated
|
||||
|
||||
# TODO: make Parameter take tensor dims?
|
||||
ParameterTensor {dims, learningRateMultiplier = 1.0, init = ''/*|uniform|fixedValue|gaussian|fromFile|fromLiteral*/, initValueScale = 1, value = 0, initValue = '', initFromFilePath = '', initFromLiteral = '', initOnCPUOnly=true, randomSeed=-1, tag=''} = new ComputationNode [ operation = 'LearnableParameter' ; shape = new TensorShape [ /*dims*/ ] /*plus the function args*/ ]
|
||||
ConstantFromString(literal, tag='') = ParameterTensor((0)/*dim, will be inferred*/, initFromLiteral = literal, learningRateMultiplier = 0.0)
|
||||
|
|
|
@ -962,7 +962,7 @@ static inline std::pair<size_t, size_t> ColumnRangeWithMBLayoutFor(size_t numCol
|
|||
if (fr.m_broadcastAllowed && !pMBLayout && numCols == 1)
|
||||
return std::pair<size_t, size_t>(0, numCols);
|
||||
if (fr.m_pMBLayout && pMBLayout && *fr.m_pMBLayout == *pMBLayout)
|
||||
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix. They are compatible though--are you missing a ReconcileDynamicAxis operation?");
|
||||
;// LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix. They are compatible though--are you missing a ReconcileDynamicAxis operation?");
|
||||
else
|
||||
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix.");
|
||||
}
|
||||
|
@ -1055,8 +1055,8 @@ static inline std::pair<DimensionVector, DimensionVector> TensorSliceWithMBLayou
|
|||
if (fr.m_pMBLayout /*get data for a loop*/ && !pMBLayout /*'data' is not samples*/ && fr.m_broadcastAllowed /*we're OK with that*/)
|
||||
; // the time dimension is broadcasting--leave it as is
|
||||
else if (fr.m_pMBLayout && pMBLayout && *fr.m_pMBLayout == *pMBLayout)
|
||||
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix. They are compatible though--are you missing a ReconcileDynamicAxis operation? %s vs. %s",
|
||||
static_cast<string>(*(fr.m_pMBLayout)).c_str(), static_cast<string>(*(pMBLayout)).c_str());
|
||||
; //LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix. They are compatible though--are you missing a ReconcileDynamicAxis operation? %s vs. %s",
|
||||
// static_cast<string>(*(fr.m_pMBLayout)).c_str(), static_cast<string>(*(pMBLayout)).c_str());
|
||||
else
|
||||
LogicError("DataFor: FrameRange's dynamic axis is inconsistent with matrix: %s vs. %s",
|
||||
static_cast<string>(*(fr.m_pMBLayout)).c_str(), static_cast<string>(*(pMBLayout)).c_str());
|
||||
|
|
|
@ -87,12 +87,21 @@ void ComputationNode<ElemType>::Backprop(const FrameRange& fr, bool childrenInTh
|
|||
// subroutines for Validate() implementations
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
static void InconsistentMBLayout(const ComputationNodeBase& us, const ComputationNodeBase& which, ComputationNodeBase& vsWhich)
|
||||
// compare two MBLayouts, and alert if they are different
|
||||
void ComputationNodeBase::ValidateMBLayout(const ComputationNodeBasePtr which, const ComputationNodeBasePtr vsWhich) const
|
||||
{
|
||||
#if 1
|
||||
RuntimeError("%ls: Dynamic axes mismatches between %ls and %ls. If this is by design, use ReconcileDynamicAxis().",
|
||||
us.NodeDescription().c_str(), which.NodeDescription().c_str(), vsWhich.NodeDescription());
|
||||
if (!which->HasMBLayout() || !vsWhich->HasMBLayout() || which->GetMBLayout() == vsWhich->GetMBLayout())
|
||||
return;
|
||||
// MBLayouts are inconsistent
|
||||
#if 0
|
||||
// can't have that
|
||||
RuntimeError("%ls: Dynamic axes mismatch between %ls and %ls. If this is by design, use ReconcileDynamicAxis().",
|
||||
NodeDescription().c_str(), which->NodeDescription().c_str(), vsWhich->NodeDescription());
|
||||
#else
|
||||
// We will let this slip with a reminder, assuming that this will be caught at runtime.
|
||||
// By allowing this, users will not need ReconcileDynamicAxis() for reductions over a sequence like BS.Sequences.Last().
|
||||
fprintf(stderr, "WARNING: %ls: Dynamic axes mismatch between %ls and %ls. If they are incompatible, this will fail later. If this is by design, use ReconcileDynamicAxis().\n",
|
||||
NodeDescription().c_str(), which->NodeDescription().c_str(), vsWhich->NodeDescription().c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -104,20 +113,20 @@ static void InconsistentMBLayout(const ComputationNodeBase& us, const Computatio
|
|||
// - if there are more than one different layouts involved, this function will fail
|
||||
void ComputationNodeBase::InferMBLayoutFromInputsForStandardCase(bool isFinalValidationPass)
|
||||
{
|
||||
MBLayoutPtr pMBLayout; // start with NULL layout
|
||||
for (auto child : m_inputs)
|
||||
ComputationNodeBasePtr firstInputWithMBLayout;
|
||||
for (auto input : m_inputs)
|
||||
{
|
||||
if (!child) // node not set yet (DelayedValueNodeBase seems to allow this)--BUGBUG: Then this function won't operate correctly.
|
||||
if (!input) // node not set yet (DelayedValueNodeBase seems to allow this)--BUGBUG: Then this function won't operate correctly.
|
||||
;
|
||||
else if (!child->m_pMBLayout) // NULL layout (typical for parameter nodes)
|
||||
else if (!input->m_pMBLayout) // NULL layout (typical for parameter nodes)
|
||||
;
|
||||
else if (!pMBLayout) // first non-NULL layout: just copy it
|
||||
pMBLayout = child->m_pMBLayout;
|
||||
else if (pMBLayout != child->m_pMBLayout && isFinalValidationPass) // got a layout--compare whether it is the same
|
||||
InconsistentMBLayout(*this, *this, *child);
|
||||
else if (!firstInputWithMBLayout) // first input with layout: remember this child
|
||||
firstInputWithMBLayout = input;
|
||||
else if (isFinalValidationPass) // got a layout--compare whether it is the same
|
||||
ValidateMBLayout(firstInputWithMBLayout, input);
|
||||
}
|
||||
// all are consistent: install it
|
||||
LinkToMBLayout(pMBLayout);
|
||||
LinkToMBLayout(firstInputWithMBLayout ? firstInputWithMBLayout->m_pMBLayout : nullptr);
|
||||
}
|
||||
|
||||
// single input that maps its input element-wise (e.g. Sigmoid)
|
||||
|
@ -140,12 +149,8 @@ void ComputationNodeBase::ValidateBinaryZip(bool isFinalValidationPass, bool all
|
|||
|
||||
ValidateInferBinaryInputDims();
|
||||
|
||||
if (isFinalValidationPass &&
|
||||
Input(0)->HasMBLayout() && Input(1)->HasMBLayout() &&
|
||||
Input(0)->GetMBLayout() != Input(1)->GetMBLayout())
|
||||
{
|
||||
InconsistentMBLayout(*this, *Input(0), *Input(1));
|
||||
}
|
||||
if (isFinalValidationPass)
|
||||
ValidateMBLayout(Input(0), Input(1));
|
||||
|
||||
// result has tensor shape with dimensions being the max over both
|
||||
let shape0 = GetInputSampleLayout(0);
|
||||
|
@ -187,8 +192,7 @@ void ComputationNodeBase::ValidateNaryZip(bool isFinalValidationPass, bool allow
|
|||
if (isFinalValidationPass)
|
||||
for (size_t i = 0; i < numInputs; i++)
|
||||
for (size_t j = i + 1; j < numInputs; j++)
|
||||
if (Input(i)->HasMBLayout() && Input(j)->HasMBLayout() && Input(i)->GetMBLayout() != Input(j)->GetMBLayout())
|
||||
InconsistentMBLayout(*this, *Input(i), *Input(j));
|
||||
ValidateMBLayout(Input(i), Input(j));
|
||||
|
||||
// result has tensor shape with dimensions being the max over all inputs
|
||||
let shape0 = GetInputSampleLayout(0);
|
||||
|
|
|
@ -679,6 +679,7 @@ protected:
|
|||
void ValidateBinaryZip(bool isFinalValidationPass, bool allowBroadcast);
|
||||
void ValidateBinaryReduce(bool isFinalValidationPass);
|
||||
void ValidateNaryZip(bool isFinalValidationPass, bool allowBroadcast, size_t numInputs);
|
||||
void ValidateMBLayout(const ComputationNodeBasePtr which, const ComputationNodeBasePtr vsWhich) const;
|
||||
void InferMBLayoutFromInputsForStandardCase(bool isFinalValidationPass);
|
||||
virtual void ValidateInferInputDimsFrom(const TensorShape&) = 0; // (implemented by ComputationNode<ElemType>)
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче