converted seqcla to sparse input
This commit is contained in:
Родитель
8bfd693b9b
Коммит
5cd5ec8842
2
CNTK.sln
2
CNTK.sln
|
@ -934,7 +934,7 @@ EndProject
|
|||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{D11F76CC-DB6D-4CB4-B3B7-AB139DE2F5FA}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
Tests\EndToEndTests\Text\SequenceClassification\Data\embeddingmatrix.txt = Tests\EndToEndTests\Text\SequenceClassification\Data\embeddingmatrix.txt
|
||||
Tests\EndToEndTests\Text\SequenceClassification\Data\Train.txt = Tests\EndToEndTests\Text\SequenceClassification\Data\Train.txt
|
||||
Tests\EndToEndTests\Text\SequenceClassification\Data\Train.ctf = Tests\EndToEndTests\Text\SequenceClassification\Data\Train.ctf
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SLU", "SLU", "{181664AC-4C95-4798-A923-09B879215B33}"
|
||||
|
|
|
@ -42,13 +42,14 @@ LinearLayer {outDim} =
|
|||
DenseLayer{outDim, activation=(x=>x)} = Sequential ( LinearLayer{outDim} : activation )
|
||||
|
||||
# EmbeddingLayer -- create a linear embedding layer
|
||||
EmbeddingLayer{outDim, # dimension of embedding
|
||||
embeddingPath = '', transpose = false} = # load a fixed embedding from a path instead
|
||||
EmbeddingLayer {outDim, # dimension of embedding
|
||||
embeddingPath = '', transpose = false} = # load a fixed embedding from a path instead
|
||||
{
|
||||
shape = if transpose then (0 : outDim) else (outDim : 0)
|
||||
E = if embeddingPath == ''
|
||||
then ParameterTensor {(outDim : 0), init='uniform'} # learnable
|
||||
else ParameterTensor {(outDim : 0), initFromFilePath = embeddingPath, learningRateMultiplier = 0} # fixed from file
|
||||
TimesOp = if embeddingPath != '' && transpose then TransposeTimes else Times
|
||||
then ParameterTensor {shape, init='uniform'} # learnable
|
||||
else ParameterTensor {shape, initFromFilePath = embeddingPath, learningRateMultiplier = 0} # fixed from file
|
||||
TimesOp = if transpose then TransposeTimes else Times
|
||||
f(x) = TimesOp (E, x) # x is expected to be sparse one-hot
|
||||
}.f
|
||||
|
||||
|
|
|
@ -251,6 +251,7 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
|
|||
// only one criterion so far TODO: support multiple ones?
|
||||
auto& learnableNodes = net->LearnableParameterNodes(criterionNodes[0]);
|
||||
list<Matrix<ElemType>> smoothedGradients;
|
||||
size_t numParameters = 0;
|
||||
|
||||
vector<wstring> nodesToUpdateDescriptions; // for logging only
|
||||
for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++)
|
||||
|
@ -263,7 +264,10 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
|
|||
node->Value().GetNumCols(),
|
||||
net->GetDeviceId()));
|
||||
if (node->IsParameterUpdateRequired())
|
||||
nodesToUpdateDescriptions.push_back(node->NodeDescription() + L" : " + msra::strfun::utf16(string(node->GetSampleLayout())).c_str());
|
||||
{
|
||||
nodesToUpdateDescriptions.push_back(node->NodeDescription() + L" : [" + msra::strfun::utf16(string(node->GetSampleLayout())) + L"]");
|
||||
numParameters += node->GetSampleLayout().GetNumElements();
|
||||
}
|
||||
}
|
||||
size_t numNeedsGradient = 0;
|
||||
for (let node : net->GetEvalOrder(criterionNodes[0]))
|
||||
|
@ -272,7 +276,8 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
|
|||
numNeedsGradient++;
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
LOGPRINTF(stderr, "Training %d out of %d parameters and %d nodes with gradient:\n", (int)nodesToUpdateDescriptions.size(), (int)learnableNodes.size(), (int)numNeedsGradient);
|
||||
LOGPRINTF(stderr, "Training %.0f parameters in %d out of %d parameters and %d nodes with gradient:\n",
|
||||
(double)numParameters, (int)nodesToUpdateDescriptions.size(), (int)learnableNodes.size(), (int)numNeedsGradient);
|
||||
for (let nodeDescription : nodesToUpdateDescriptions)
|
||||
{
|
||||
LOGPRINTF(stderr, "\t%ls\n", nodeDescription.c_str());
|
||||
|
|
|
@ -13,6 +13,8 @@ deviceId = $DeviceId$
|
|||
modelPath="$ModelDir$/seqcla.dnn"
|
||||
makeMode = false # set true to enable checkpointing
|
||||
|
||||
vocabDim = 2000
|
||||
|
||||
Train=[
|
||||
action="train"
|
||||
|
||||
|
@ -22,6 +24,10 @@ Train=[
|
|||
embedding = Transpose(LearnableParameter(vocabSize, embeddingDim, learningRateMultiplier = 0.0, init = 'fromFile', initFromFilePath = embeddingPath))
|
||||
lookup = GatherPacked(features, embedding)
|
||||
].lookup
|
||||
EmbeddingLayerSparse(input, vocabSize, embeddingDim, embeddingPath) = [
|
||||
embedding = Transpose(LearnableParameter(vocabSize, embeddingDim, learningRateMultiplier = 0.0, init = 'fromFile', initFromFilePath = embeddingPath))
|
||||
lookup = embedding * features
|
||||
].lookup
|
||||
DenseLayer(input, inputSize, outputSize, activation) = [
|
||||
z = BFF(input, outputSize, inputSize).z
|
||||
act = activation(z)
|
||||
|
@ -38,13 +44,14 @@ Train=[
|
|||
|
||||
// model dims
|
||||
numLabels = 5
|
||||
vocabDim = 2000
|
||||
vocabDim = $vocabDim$
|
||||
embedDim = 50
|
||||
|
||||
# definition without layer composition
|
||||
modelMacroStyle (features) = {
|
||||
// load the pre-learned word embedding matrix
|
||||
l1 = Layers.EmbeddingLayer(features, vocabDim, embedDim, 'embeddingmatrix.txt')
|
||||
#l1 = Layers.EmbeddingLayer(features, vocabDim, embedDim, 'embeddingmatrix.txt')
|
||||
l1 = Layers.EmbeddingLayerSparse(features, vocabDim, embedDim, 'embeddingmatrix.txt')
|
||||
l2 = Layers.LSTMLayer(l1, embedDim, lstmDim, cellDim, BS.Sequences.Last)
|
||||
l3 = Layers.DenseLayer(l2, lstmDim, numLabels, Pass)
|
||||
z = l3
|
||||
|
@ -53,7 +60,8 @@ Train=[
|
|||
# definition with layer composition
|
||||
modelLayerStyle (features) = {
|
||||
// load the pre-learned word embedding matrix
|
||||
l1 = Layers.EmbeddingLayer(features, vocabDim, embedDim, 'embeddingmatrix.txt')
|
||||
l1o = Layers.EmbeddingLayer(features, vocabDim, embedDim, 'embeddingmatrix.txt')
|
||||
l1 = EmbeddingLayer {embedDim, embeddingPath='embeddingmatrix.txt', transpose=true} (features)
|
||||
l2 = Layers.LSTMLayer(l1, embedDim, lstmDim, cellDim, BS.Sequences.Last)
|
||||
l3 = Layers.DenseLayer(l2, lstmDim, numLabels, Pass)
|
||||
z = l3
|
||||
|
@ -61,7 +69,7 @@ Train=[
|
|||
|
||||
# inputs
|
||||
t = DynamicAxis{}
|
||||
features = Input {1, dynamicAxis=t} # Input has shape (1,t)
|
||||
features = SparseInput {$vocabDim$, dynamicAxis=t} # Input has shape (1,t)
|
||||
labels = Input {numLabels} # Input has shape (numLabels,*) where all sequences in *=1
|
||||
|
||||
# apply model
|
||||
|
@ -92,22 +100,14 @@ Train=[
|
|||
# We are testing checkpointing, keep all checkpoint (.ckp) files
|
||||
keepCheckPointFiles = true
|
||||
]
|
||||
|
||||
|
||||
reader = [
|
||||
readerType = "CNTKTextFormatReader"
|
||||
file = "$DataDir$/Train.txt"
|
||||
|
||||
input = [
|
||||
features=[
|
||||
alias = "x"
|
||||
dim = 1
|
||||
format = "dense"
|
||||
]
|
||||
labels=[
|
||||
alias = "y"
|
||||
dim = 5
|
||||
format = "dense"
|
||||
]
|
||||
#file = "$DataDir$/Train.txt"
|
||||
file = "$DataDir$/Train.ctf"
|
||||
input = [
|
||||
features = [ alias = "x" ; dim = $vocabDim$ ; format = "sparse" ]
|
||||
labels = [ alias = "y" ; dim = 5 ; format = "dense" ]
|
||||
]
|
||||
]
|
||||
outputPath = "$OutputDir$/output.txt" # dump the output as text?
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Загрузка…
Ссылка в новой задаче