updated SLUHandsOn tests
This commit is contained in:
Родитель
9e73e6fda5
Коммит
769b2602a2
|
@ -46,11 +46,12 @@ DenseLayer{outDim, bias = true, activation=(x=>x), init='heNormal', initValueSca
|
||||||
|
|
||||||
# EmbeddingLayer -- create a linear embedding layer
|
# EmbeddingLayer -- create a linear embedding layer
|
||||||
EmbeddingLayer {outDim, # dimension of embedding
|
EmbeddingLayer {outDim, # dimension of embedding
|
||||||
|
init='heNormal', initValueScale=1,
|
||||||
embeddingPath = '', transpose = false} = # load a fixed embedding from a path instead
|
embeddingPath = '', transpose = false} = # load a fixed embedding from a path instead
|
||||||
{
|
{
|
||||||
shape = if transpose then (Inferred : outDim) else (outDim : Inferred)
|
shape = if transpose then (Inferred : outDim) else (outDim : Inferred)
|
||||||
E = if embeddingPath == ''
|
E = if embeddingPath == ''
|
||||||
then ParameterTensor {shape, init='heNormal'} # learnable
|
then ParameterTensor {shape, init=init, initValueScale=initValueScale} # learnable
|
||||||
else ParameterTensor {shape, initFromFilePath = embeddingPath, learningRateMultiplier = 0} # fixed from file
|
else ParameterTensor {shape, initFromFilePath = embeddingPath, learningRateMultiplier = 0} # fixed from file
|
||||||
TimesOp = if transpose then TransposeTimes else Times
|
TimesOp = if transpose then TransposeTimes else Times
|
||||||
apply (x) = TimesOp (E, x) # x is expected to be sparse one-hot
|
apply (x) = TimesOp (E, x) # x is expected to be sparse one-hot
|
||||||
|
|
|
@ -23,7 +23,7 @@ struct RnnAttributes
|
||||||
RnnAttributes(bool bidirectional, size_t numLayers, size_t hiddenSize, const wstring& recurrentOp, int axis) :
|
RnnAttributes(bool bidirectional, size_t numLayers, size_t hiddenSize, const wstring& recurrentOp, int axis) :
|
||||||
m_bidirectional(bidirectional), m_numLayers(numLayers), m_hiddenSize(hiddenSize), m_recurrentOp(recurrentOp), m_axis(axis)
|
m_bidirectional(bidirectional), m_numLayers(numLayers), m_hiddenSize(hiddenSize), m_recurrentOp(recurrentOp), m_axis(axis)
|
||||||
{
|
{
|
||||||
if (m_recurrentOp != wstring(L"lstm") && m_recurrentOp != wstring(L"gru") &&
|
if (m_recurrentOp != wstring(L"lstm") && m_recurrentOp != wstring(L"gru") &&
|
||||||
m_recurrentOp != wstring(L"rnnReLU") && m_recurrentOp != wstring(L"rnnTanh"))
|
m_recurrentOp != wstring(L"rnnReLU") && m_recurrentOp != wstring(L"rnnTanh"))
|
||||||
{
|
{
|
||||||
InvalidArgument("Unknown cell type '%ls'. Supported values are 'lstm', 'gru', 'rnnReLU', 'rnnTanh'.", m_recurrentOp.c_str());
|
InvalidArgument("Unknown cell type '%ls'. Supported values are 'lstm', 'gru', 'rnnReLU', 'rnnTanh'.", m_recurrentOp.c_str());
|
||||||
|
@ -45,12 +45,12 @@ struct RnnAttributes
|
||||||
for (size_t i = 0; i < m_numLayers; i++)
|
for (size_t i = 0; i < m_numLayers; i++)
|
||||||
{
|
{
|
||||||
size_t oneNetTotal =
|
size_t oneNetTotal =
|
||||||
numNetworks * m_hiddenSize // 1, 3, or 4 networks producing hidden-dim output
|
numNetworks * m_hiddenSize // 1, 3, or 4 networks producing hidden-dim output
|
||||||
* (inputDim + m_hiddenSize) // each network has these two inputs
|
* (inputDim + m_hiddenSize) // each network has these two inputs
|
||||||
+ numNetworks * m_hiddenSize // biases
|
+ numNetworks * m_hiddenSize // biases
|
||||||
* 2; // for unknown reasons, cudnn5 uses 2 bias terms everywhere
|
* 2; // for unknown reasons, cudnn5 uses 2 bias terms everywhere
|
||||||
total += oneNetTotal * bidirFactor; // 1 or 2 directions
|
total += oneNetTotal * bidirFactor; // 1 or 2 directions
|
||||||
inputDim = bidirFactor * m_hiddenSize; // next layer continues with this as input
|
inputDim = bidirFactor * m_hiddenSize; // next layer continues with this as input
|
||||||
}
|
}
|
||||||
return make_pair(m_hiddenSize, total / m_hiddenSize);
|
return make_pair(m_hiddenSize, total / m_hiddenSize);
|
||||||
}
|
}
|
||||||
|
@ -59,10 +59,10 @@ struct RnnAttributes
|
||||||
{
|
{
|
||||||
return
|
return
|
||||||
m_bidirectional == other.m_bidirectional &&
|
m_bidirectional == other.m_bidirectional &&
|
||||||
m_numLayers == other.m_numLayers &&
|
m_numLayers == other.m_numLayers &&
|
||||||
m_hiddenSize == other.m_hiddenSize &&
|
m_hiddenSize == other.m_hiddenSize &&
|
||||||
m_recurrentOp == other.m_recurrentOp &&
|
m_recurrentOp == other.m_recurrentOp &&
|
||||||
m_axis == other.m_axis;
|
m_axis == other.m_axis;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Read(File& stream, bool readAxis)
|
void Read(File& stream, bool readAxis)
|
||||||
|
|
|
@ -21,9 +21,9 @@ TrainTagger = {
|
||||||
hiddenDim = 300
|
hiddenDim = 300
|
||||||
|
|
||||||
model = Sequential (
|
model = Sequential (
|
||||||
EmbeddingLayer {embDim} : # embedding
|
EmbeddingLayer {embDim, init='uniform'} : # embedding
|
||||||
RecurrentLSTMLayer {hiddenDim, goBackwards=false} : # LSTM
|
RecurrentLSTMLayer {hiddenDim, goBackwards=false, init='uniform'} : # LSTM
|
||||||
DenseLayer {labelDim, initValueScale=7} # output layer
|
DenseLayer {labelDim, init='uniform', initValueScale=7} # output layer
|
||||||
)
|
)
|
||||||
|
|
||||||
# features
|
# features
|
||||||
|
|
|
@ -20,11 +20,11 @@ TrainTagger = {
|
||||||
hiddenDim = 300
|
hiddenDim = 300
|
||||||
|
|
||||||
model = Sequential (
|
model = Sequential (
|
||||||
EmbeddingLayer {embDim} : # embedding
|
EmbeddingLayer {embDim, init='uniform'} : # embedding
|
||||||
BatchNormalizationLayer {normalizationTimeConstant=2048} : ##### added
|
BatchNormalizationLayer {normalizationTimeConstant=2048} : ##### added
|
||||||
RecurrentLSTMLayer {hiddenDim, goBackwards=false} : # LSTM
|
RecurrentLSTMLayer {hiddenDim, goBackwards=false, init='uniform'} : # LSTM
|
||||||
BatchNormalizationLayer {normalizationTimeConstant=2048} : ##### added
|
BatchNormalizationLayer {normalizationTimeConstant=2048} : ##### added
|
||||||
DenseLayer {labelDim, initValueScale=7} # output layer
|
DenseLayer {labelDim, init='uniform', initValueScale=7} # output layer
|
||||||
)
|
)
|
||||||
|
|
||||||
# features
|
# features
|
||||||
|
|
|
@ -22,12 +22,12 @@ TrainTagger = {
|
||||||
LookaheadLayer (x) = Splice (x : FutureValue (0, x, defaultHiddenActivation=0))
|
LookaheadLayer (x) = Splice (x : FutureValue (0, x, defaultHiddenActivation=0))
|
||||||
|
|
||||||
model = Sequential (
|
model = Sequential (
|
||||||
EmbeddingLayer {embDim} : # embedding
|
EmbeddingLayer {embDim, init='uniform'} : # embedding
|
||||||
LookaheadLayer : ##### added
|
LookaheadLayer : ##### added
|
||||||
BatchNormalizationLayer {normalizationTimeConstant=2048} :
|
BatchNormalizationLayer {normalizationTimeConstant=2048} :
|
||||||
RecurrentLSTMLayer {hiddenDim, goBackwards=false} : # LSTM
|
RecurrentLSTMLayer {hiddenDim, goBackwards=false, init='uniform'} : # LSTM
|
||||||
BatchNormalizationLayer {normalizationTimeConstant=2048} :
|
BatchNormalizationLayer {normalizationTimeConstant=2048} :
|
||||||
DenseLayer {labelDim, initValueScale=7} # output layer
|
DenseLayer {labelDim, init='uniform', initValueScale=7} # output layer
|
||||||
)
|
)
|
||||||
|
|
||||||
# features
|
# features
|
||||||
|
|
|
@ -20,17 +20,17 @@ TrainTagger = {
|
||||||
hiddenDim = 150
|
hiddenDim = 150
|
||||||
|
|
||||||
BiRecurrentLSTMLayer {outDim} = {
|
BiRecurrentLSTMLayer {outDim} = {
|
||||||
F = RecurrentLSTMLayer {outDim, goBackwards=false}
|
F = RecurrentLSTMLayer {outDim, goBackwards=false, init='uniform'}
|
||||||
G = RecurrentLSTMLayer {outDim, goBackwards=true}
|
G = RecurrentLSTMLayer {outDim, goBackwards=true, init='uniform'}
|
||||||
apply (x) = Splice (F(x):G(x))
|
apply (x) = Splice (F(x):G(x))
|
||||||
}.apply
|
}.apply
|
||||||
|
|
||||||
model = Sequential (
|
model = Sequential (
|
||||||
EmbeddingLayer {embDim} :
|
EmbeddingLayer {embDim, init='uniform'} :
|
||||||
BatchNormalizationLayer {normalizationTimeConstant=2048} :
|
BatchNormalizationLayer {normalizationTimeConstant=2048} :
|
||||||
BiRecurrentLSTMLayer {hiddenDim} :
|
BiRecurrentLSTMLayer {hiddenDim} :
|
||||||
BatchNormalizationLayer {normalizationTimeConstant=2048} :
|
BatchNormalizationLayer {normalizationTimeConstant=2048} :
|
||||||
DenseLayer {labelDim, initValueScale=7}
|
DenseLayer {labelDim, init='uniform', initValueScale=7}
|
||||||
)
|
)
|
||||||
|
|
||||||
# features
|
# features
|
||||||
|
|
|
@ -20,17 +20,17 @@ TrainTagger = {
|
||||||
hiddenDim = 150
|
hiddenDim = 150
|
||||||
|
|
||||||
BiRecurrentLSTMLayer {outDim} = {
|
BiRecurrentLSTMLayer {outDim} = {
|
||||||
F = RecurrentLSTMLayer {outDim, goBackwards=false}
|
F = RecurrentLSTMLayer {outDim, goBackwards=false, init='uniform'}
|
||||||
G = RecurrentLSTMLayer {outDim, goBackwards=true}
|
G = RecurrentLSTMLayer {outDim, goBackwards=true, init='uniform'}
|
||||||
apply (x) = Splice (BS.Sequences.Last(F(x)):BS.Sequences.First(G(x)))
|
apply (x) = Splice (BS.Sequences.Last(F(x)):BS.Sequences.First(G(x)))
|
||||||
}.apply
|
}.apply
|
||||||
|
|
||||||
model = Sequential (
|
model = Sequential (
|
||||||
EmbeddingLayer {embDim} :
|
EmbeddingLayer {embDim, init='uniform'} :
|
||||||
BatchNormalizationLayer {normalizationTimeConstant=2048} :
|
BatchNormalizationLayer {normalizationTimeConstant=2048} :
|
||||||
BiRecurrentLSTMLayer {hiddenDim} :
|
BiRecurrentLSTMLayer {hiddenDim} :
|
||||||
BatchNormalizationLayer {normalizationTimeConstant=2048} :
|
BatchNormalizationLayer {normalizationTimeConstant=2048} :
|
||||||
DenseLayer {intentDim, initValueScale=7}
|
DenseLayer {intentDim, init='uniform', initValueScale=7}
|
||||||
)
|
)
|
||||||
|
|
||||||
# features
|
# features
|
||||||
|
|
Загрузка…
Ссылка в новой задаче