This commit is contained in:
Frank Seide 2016-08-24 18:15:09 -07:00
Родитель 9e73e6fda5
Коммит 769b2602a2
7 изменённых файлов: 32 добавлений и 31 удалений

Просмотреть файл

@ -46,11 +46,12 @@ DenseLayer{outDim, bias = true, activation=(x=>x), init='heNormal', initValueSca
# EmbeddingLayer -- create a linear embedding layer
EmbeddingLayer {outDim, # dimension of embedding
init='heNormal', initValueScale=1,
embeddingPath = '', transpose = false} = # load a fixed embedding from a path instead
{
shape = if transpose then (Inferred : outDim) else (outDim : Inferred)
E = if embeddingPath == ''
then ParameterTensor {shape, init='heNormal'} # learnable
then ParameterTensor {shape, init=init, initValueScale=initValueScale} # learnable
else ParameterTensor {shape, initFromFilePath = embeddingPath, learningRateMultiplier = 0} # fixed from file
TimesOp = if transpose then TransposeTimes else Times
apply (x) = TimesOp (E, x) # x is expected to be sparse one-hot

Просмотреть файл

@ -23,7 +23,7 @@ struct RnnAttributes
RnnAttributes(bool bidirectional, size_t numLayers, size_t hiddenSize, const wstring& recurrentOp, int axis) :
m_bidirectional(bidirectional), m_numLayers(numLayers), m_hiddenSize(hiddenSize), m_recurrentOp(recurrentOp), m_axis(axis)
{
if (m_recurrentOp != wstring(L"lstm") && m_recurrentOp != wstring(L"gru") &&
if (m_recurrentOp != wstring(L"lstm") && m_recurrentOp != wstring(L"gru") &&
m_recurrentOp != wstring(L"rnnReLU") && m_recurrentOp != wstring(L"rnnTanh"))
{
InvalidArgument("Unknown cell type '%ls'. Supported values are 'lstm', 'gru', 'rnnReLU', 'rnnTanh'.", m_recurrentOp.c_str());
@ -45,12 +45,12 @@ struct RnnAttributes
for (size_t i = 0; i < m_numLayers; i++)
{
size_t oneNetTotal =
numNetworks * m_hiddenSize // 1, 3, or 4 networks producing hidden-dim output
* (inputDim + m_hiddenSize) // each network has these two inputs
+ numNetworks * m_hiddenSize // biases
* 2; // for unknown reasons, cudnn5 uses 2 bias terms everywhere
total += oneNetTotal * bidirFactor; // 1 or 2 directions
inputDim = bidirFactor * m_hiddenSize; // next layer continues with this as input
numNetworks * m_hiddenSize // 1, 3, or 4 networks producing hidden-dim output
* (inputDim + m_hiddenSize) // each network has these two inputs
+ numNetworks * m_hiddenSize // biases
* 2; // for unknown reasons, cudnn5 uses 2 bias terms everywhere
total += oneNetTotal * bidirFactor; // 1 or 2 directions
inputDim = bidirFactor * m_hiddenSize; // next layer continues with this as input
}
return make_pair(m_hiddenSize, total / m_hiddenSize);
}
@ -59,10 +59,10 @@ struct RnnAttributes
{
return
m_bidirectional == other.m_bidirectional &&
m_numLayers == other.m_numLayers &&
m_hiddenSize == other.m_hiddenSize &&
m_recurrentOp == other.m_recurrentOp &&
m_axis == other.m_axis;
m_numLayers == other.m_numLayers &&
m_hiddenSize == other.m_hiddenSize &&
m_recurrentOp == other.m_recurrentOp &&
m_axis == other.m_axis;
}
void Read(File& stream, bool readAxis)

Просмотреть файл

@ -21,9 +21,9 @@ TrainTagger = {
hiddenDim = 300
model = Sequential (
EmbeddingLayer {embDim} : # embedding
RecurrentLSTMLayer {hiddenDim, goBackwards=false} : # LSTM
DenseLayer {labelDim, initValueScale=7} # output layer
EmbeddingLayer {embDim, init='uniform'} : # embedding
RecurrentLSTMLayer {hiddenDim, goBackwards=false, init='uniform'} : # LSTM
DenseLayer {labelDim, init='uniform', initValueScale=7} # output layer
)
# features

Просмотреть файл

@ -20,11 +20,11 @@ TrainTagger = {
hiddenDim = 300
model = Sequential (
EmbeddingLayer {embDim} : # embedding
BatchNormalizationLayer {normalizationTimeConstant=2048} : ##### added
RecurrentLSTMLayer {hiddenDim, goBackwards=false} : # LSTM
BatchNormalizationLayer {normalizationTimeConstant=2048} : ##### added
DenseLayer {labelDim, initValueScale=7} # output layer
EmbeddingLayer {embDim, init='uniform'} : # embedding
BatchNormalizationLayer {normalizationTimeConstant=2048} : ##### added
RecurrentLSTMLayer {hiddenDim, goBackwards=false, init='uniform'} : # LSTM
BatchNormalizationLayer {normalizationTimeConstant=2048} : ##### added
DenseLayer {labelDim, init='uniform', initValueScale=7} # output layer
)
# features

Просмотреть файл

@ -22,12 +22,12 @@ TrainTagger = {
LookaheadLayer (x) = Splice (x : FutureValue (0, x, defaultHiddenActivation=0))
model = Sequential (
EmbeddingLayer {embDim} : # embedding
EmbeddingLayer {embDim, init='uniform'} : # embedding
LookaheadLayer : ##### added
BatchNormalizationLayer {normalizationTimeConstant=2048} :
RecurrentLSTMLayer {hiddenDim, goBackwards=false} : # LSTM
RecurrentLSTMLayer {hiddenDim, goBackwards=false, init='uniform'} : # LSTM
BatchNormalizationLayer {normalizationTimeConstant=2048} :
DenseLayer {labelDim, initValueScale=7} # output layer
DenseLayer {labelDim, init='uniform', initValueScale=7} # output layer
)
# features

Просмотреть файл

@ -20,17 +20,17 @@ TrainTagger = {
hiddenDim = 150
BiRecurrentLSTMLayer {outDim} = {
F = RecurrentLSTMLayer {outDim, goBackwards=false}
G = RecurrentLSTMLayer {outDim, goBackwards=true}
F = RecurrentLSTMLayer {outDim, goBackwards=false, init='uniform'}
G = RecurrentLSTMLayer {outDim, goBackwards=true, init='uniform'}
apply (x) = Splice (F(x):G(x))
}.apply
model = Sequential (
EmbeddingLayer {embDim} :
EmbeddingLayer {embDim, init='uniform'} :
BatchNormalizationLayer {normalizationTimeConstant=2048} :
BiRecurrentLSTMLayer {hiddenDim} :
BatchNormalizationLayer {normalizationTimeConstant=2048} :
DenseLayer {labelDim, initValueScale=7}
DenseLayer {labelDim, init='uniform', initValueScale=7}
)
# features

Просмотреть файл

@ -20,17 +20,17 @@ TrainTagger = {
hiddenDim = 150
BiRecurrentLSTMLayer {outDim} = {
F = RecurrentLSTMLayer {outDim, goBackwards=false}
G = RecurrentLSTMLayer {outDim, goBackwards=true}
F = RecurrentLSTMLayer {outDim, goBackwards=false, init='uniform'}
G = RecurrentLSTMLayer {outDim, goBackwards=true, init='uniform'}
apply (x) = Splice (BS.Sequences.Last(F(x)):BS.Sequences.First(G(x)))
}.apply
model = Sequential (
EmbeddingLayer {embDim} :
EmbeddingLayer {embDim, init='uniform'} :
BatchNormalizationLayer {normalizationTimeConstant=2048} :
BiRecurrentLSTMLayer {hiddenDim} :
BatchNormalizationLayer {normalizationTimeConstant=2048} :
DenseLayer {intentDim, initValueScale=7}
DenseLayer {intentDim, init='uniform', initValueScale=7}
)
# features