converted seqcla to sparse input

This commit is contained in:
Frank Seide 2016-08-07 18:34:14 -07:00
Родитель 8bfd693b9b
Коммит 5cd5ec8842
5 изменённых файлов: 5465 добавлений и 26 удалений

Просмотреть файл

@ -934,7 +934,7 @@ EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{D11F76CC-DB6D-4CB4-B3B7-AB139DE2F5FA}"
ProjectSection(SolutionItems) = preProject
Tests\EndToEndTests\Text\SequenceClassification\Data\embeddingmatrix.txt = Tests\EndToEndTests\Text\SequenceClassification\Data\embeddingmatrix.txt
Tests\EndToEndTests\Text\SequenceClassification\Data\Train.txt = Tests\EndToEndTests\Text\SequenceClassification\Data\Train.txt
Tests\EndToEndTests\Text\SequenceClassification\Data\Train.ctf = Tests\EndToEndTests\Text\SequenceClassification\Data\Train.ctf
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SLU", "SLU", "{181664AC-4C95-4798-A923-09B879215B33}"

Просмотреть файл

@ -42,13 +42,14 @@ LinearLayer {outDim} =
DenseLayer{outDim, activation=(x=>x)} = Sequential ( LinearLayer{outDim} : activation )
# EmbeddingLayer -- create a linear embedding layer
EmbeddingLayer{outDim, # dimension of embedding
embeddingPath = '', transpose = false} = # load a fixed embedding from a path instead
EmbeddingLayer {outDim, # dimension of embedding
embeddingPath = '', transpose = false} = # load a fixed embedding from a path instead
{
shape = if transpose then (0 : outDim) else (outDim : 0)
E = if embeddingPath == ''
then ParameterTensor {(outDim : 0), init='uniform'} # learnable
else ParameterTensor {(outDim : 0), initFromFilePath = embeddingPath, learningRateMultiplier = 0} # fixed from file
TimesOp = if embeddingPath != '' && transpose then TransposeTimes else Times
then ParameterTensor {shape, init='uniform'} # learnable
else ParameterTensor {shape, initFromFilePath = embeddingPath, learningRateMultiplier = 0} # fixed from file
TimesOp = if transpose then TransposeTimes else Times
f(x) = TimesOp (E, x) # x is expected to be sparse one-hot
}.f

Просмотреть файл

@ -251,6 +251,7 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
// only one criterion so far TODO: support multiple ones?
auto& learnableNodes = net->LearnableParameterNodes(criterionNodes[0]);
list<Matrix<ElemType>> smoothedGradients;
size_t numParameters = 0;
vector<wstring> nodesToUpdateDescriptions; // for logging only
for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++)
@ -263,7 +264,10 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
node->Value().GetNumCols(),
net->GetDeviceId()));
if (node->IsParameterUpdateRequired())
nodesToUpdateDescriptions.push_back(node->NodeDescription() + L" : " + msra::strfun::utf16(string(node->GetSampleLayout())).c_str());
{
nodesToUpdateDescriptions.push_back(node->NodeDescription() + L" : [" + msra::strfun::utf16(string(node->GetSampleLayout())) + L"]");
numParameters += node->GetSampleLayout().GetNumElements();
}
}
size_t numNeedsGradient = 0;
for (let node : net->GetEvalOrder(criterionNodes[0]))
@ -272,7 +276,8 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
numNeedsGradient++;
}
fprintf(stderr, "\n");
LOGPRINTF(stderr, "Training %d out of %d parameters and %d nodes with gradient:\n", (int)nodesToUpdateDescriptions.size(), (int)learnableNodes.size(), (int)numNeedsGradient);
LOGPRINTF(stderr, "Training %.0f parameters in %d out of %d parameters and %d nodes with gradient:\n",
(double)numParameters, (int)nodesToUpdateDescriptions.size(), (int)learnableNodes.size(), (int)numNeedsGradient);
for (let nodeDescription : nodesToUpdateDescriptions)
{
LOGPRINTF(stderr, "\t%ls\n", nodeDescription.c_str());

Просмотреть файл

@ -13,6 +13,8 @@ deviceId = $DeviceId$
modelPath="$ModelDir$/seqcla.dnn"
makeMode = false # set true to enable checkpointing
vocabDim = 2000
Train=[
action="train"
@ -22,6 +24,10 @@ Train=[
embedding = Transpose(LearnableParameter(vocabSize, embeddingDim, learningRateMultiplier = 0.0, init = 'fromFile', initFromFilePath = embeddingPath))
lookup = GatherPacked(features, embedding)
].lookup
EmbeddingLayerSparse(input, vocabSize, embeddingDim, embeddingPath) = [
embedding = Transpose(LearnableParameter(vocabSize, embeddingDim, learningRateMultiplier = 0.0, init = 'fromFile', initFromFilePath = embeddingPath))
lookup = embedding * features
].lookup
DenseLayer(input, inputSize, outputSize, activation) = [
z = BFF(input, outputSize, inputSize).z
act = activation(z)
@ -38,13 +44,14 @@ Train=[
// model dims
numLabels = 5
vocabDim = 2000
vocabDim = $vocabDim$
embedDim = 50
# definition without layer composition
modelMacroStyle (features) = {
// load the pre-learned word embedding matrix
l1 = Layers.EmbeddingLayer(features, vocabDim, embedDim, 'embeddingmatrix.txt')
#l1 = Layers.EmbeddingLayer(features, vocabDim, embedDim, 'embeddingmatrix.txt')
l1 = Layers.EmbeddingLayerSparse(features, vocabDim, embedDim, 'embeddingmatrix.txt')
l2 = Layers.LSTMLayer(l1, embedDim, lstmDim, cellDim, BS.Sequences.Last)
l3 = Layers.DenseLayer(l2, lstmDim, numLabels, Pass)
z = l3
@ -53,7 +60,8 @@ Train=[
# definition with layer composition
modelLayerStyle (features) = {
// load the pre-learned word embedding matrix
l1 = Layers.EmbeddingLayer(features, vocabDim, embedDim, 'embeddingmatrix.txt')
l1o = Layers.EmbeddingLayer(features, vocabDim, embedDim, 'embeddingmatrix.txt')
l1 = EmbeddingLayer {embedDim, embeddingPath='embeddingmatrix.txt', transpose=true} (features)
l2 = Layers.LSTMLayer(l1, embedDim, lstmDim, cellDim, BS.Sequences.Last)
l3 = Layers.DenseLayer(l2, lstmDim, numLabels, Pass)
z = l3
@ -61,7 +69,7 @@ Train=[
# inputs
t = DynamicAxis{}
features = Input {1, dynamicAxis=t} # Input has shape (1,t)
features = SparseInput {$vocabDim$, dynamicAxis=t} # Input has shape (1,t)
labels = Input {numLabels} # Input has shape (numLabels,*) where all sequences in *=1
# apply model
@ -92,22 +100,14 @@ Train=[
# We are testing checkpointing, keep all checkpoint (.ckp) files
keepCheckPointFiles = true
]
reader = [
readerType = "CNTKTextFormatReader"
file = "$DataDir$/Train.txt"
input = [
features=[
alias = "x"
dim = 1
format = "dense"
]
labels=[
alias = "y"
dim = 5
format = "dense"
]
#file = "$DataDir$/Train.txt"
file = "$DataDir$/Train.ctf"
input = [
features = [ alias = "x" ; dim = $vocabDim$ ; format = "sparse" ]
labels = [ alias = "y" ; dim = 5 ; format = "dense" ]
]
]
outputPath = "$OutputDir$/output.txt" # dump the output as text?

Разница между файлами не показана из-за своего большого размера Загрузить разницу