converted seqcla to sparse input

2016-08-07 18:34:14 -07:00 · 2016-08-07 18:34:14 -07:00 · 5cd5ec8842
--- a/CNTK.sln
+++ b/CNTK.sln
@ -934,7 +934,7 @@ EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Data", "Data", "{D11F76CC-DB6D-4CB4-B3B7-AB139DE2F5FA}"
 	ProjectSection(SolutionItems) = preProject
 		Tests\EndToEndTests\Text\SequenceClassification\Data\embeddingmatrix.txt = Tests\EndToEndTests\Text\SequenceClassification\Data\embeddingmatrix.txt
-		Tests\EndToEndTests\Text\SequenceClassification\Data\Train.txt = Tests\EndToEndTests\Text\SequenceClassification\Data\Train.txt
+		Tests\EndToEndTests\Text\SequenceClassification\Data\Train.ctf = Tests\EndToEndTests\Text\SequenceClassification\Data\Train.ctf
 	EndProjectSection
 EndProject
 Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "SLU", "SLU", "{181664AC-4C95-4798-A923-09B879215B33}"
--- a/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs
+++ b/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs
@ -42,13 +42,14 @@ LinearLayer {outDim} =
 DenseLayer{outDim, activation=(x=>x)} = Sequential ( LinearLayer{outDim} : activation )

 # EmbeddingLayer -- create a linear embedding layer
-EmbeddingLayer{outDim,                                   # dimension of embedding
-               embeddingPath = '', transpose = false} =  # load a fixed embedding from a path instead
+EmbeddingLayer {outDim,                                   # dimension of embedding
+                embeddingPath = '', transpose = false} =  # load a fixed embedding from a path instead
 {
+    shape = if transpose then (0 : outDim) else (outDim : 0)
    E = if embeddingPath == ''
-        then ParameterTensor {(outDim : 0), init='uniform'}  # learnable
-        else ParameterTensor {(outDim : 0), initFromFilePath = embeddingPath, learningRateMultiplier = 0}  # fixed from file
-    TimesOp = if embeddingPath != '' && transpose then TransposeTimes else Times
+        then ParameterTensor {shape, init='uniform'}  # learnable
+        else ParameterTensor {shape, initFromFilePath = embeddingPath, learningRateMultiplier = 0}  # fixed from file
+    TimesOp = if transpose then TransposeTimes else Times
    f(x) = TimesOp (E, x)    # x is expected to be sparse one-hot
 }.f

--- a/Source/SGDLib/SGD.cpp
+++ b/Source/SGDLib/SGD.cpp
@ -251,6 +251,7 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
    // only one criterion so far TODO: support multiple ones?
    auto& learnableNodes = net->LearnableParameterNodes(criterionNodes[0]);
    list<Matrix<ElemType>> smoothedGradients;
+    size_t numParameters = 0;

    vector<wstring> nodesToUpdateDescriptions; // for logging only
    for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++)
@ -263,7 +264,10 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
                                                     node->Value().GetNumCols(),
                                                     net->GetDeviceId()));
        if (node->IsParameterUpdateRequired())
-            nodesToUpdateDescriptions.push_back(node->NodeDescription() + L" : " + msra::strfun::utf16(string(node->GetSampleLayout())).c_str());
+        {
+            nodesToUpdateDescriptions.push_back(node->NodeDescription() + L" : [" + msra::strfun::utf16(string(node->GetSampleLayout())) + L"]");
+            numParameters += node->GetSampleLayout().GetNumElements();
+        }
    }
    size_t numNeedsGradient = 0;
    for (let node : net->GetEvalOrder(criterionNodes[0]))
@ -272,7 +276,8 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
            numNeedsGradient++;
    }
    fprintf(stderr, "\n");
-    LOGPRINTF(stderr, "Training %d out of %d parameters and %d nodes with gradient:\n", (int)nodesToUpdateDescriptions.size(), (int)learnableNodes.size(), (int)numNeedsGradient);
+    LOGPRINTF(stderr, "Training %.0f parameters in %d out of %d parameters and %d nodes with gradient:\n",
+              (double)numParameters, (int)nodesToUpdateDescriptions.size(), (int)learnableNodes.size(), (int)numNeedsGradient);
    for (let nodeDescription : nodesToUpdateDescriptions)
    {
        LOGPRINTF(stderr, "\t%ls\n", nodeDescription.c_str());
--- a/Tests/EndToEndTests/Text/SequenceClassification/Config/seqcla.cntk
+++ b/Tests/EndToEndTests/Text/SequenceClassification/Config/seqcla.cntk
@ -13,6 +13,8 @@ deviceId = $DeviceId$
 modelPath="$ModelDir$/seqcla.dnn"
 makeMode = false # set true to enable checkpointing

+vocabDim = 2000
+
 Train=[
    action="train"
    
@ -22,6 +24,10 @@ Train=[
                embedding = Transpose(LearnableParameter(vocabSize, embeddingDim, learningRateMultiplier = 0.0, init = 'fromFile', initFromFilePath = embeddingPath))          
                lookup = GatherPacked(features, embedding)
            ].lookup
+            EmbeddingLayerSparse(input, vocabSize, embeddingDim, embeddingPath) = [
+                embedding = Transpose(LearnableParameter(vocabSize, embeddingDim, learningRateMultiplier = 0.0, init = 'fromFile', initFromFilePath = embeddingPath))          
+                lookup = embedding * features
+            ].lookup
            DenseLayer(input, inputSize, outputSize, activation) = [
               z = BFF(input, outputSize, inputSize).z
               act = activation(z)
@ -38,13 +44,14 @@ Train=[

        // model dims
        numLabels = 5
-        vocabDim = 2000
+        vocabDim = $vocabDim$
        embedDim = 50

        # definition without layer composition
        modelMacroStyle (features) = {
            // load the pre-learned word embedding matrix
-            l1 = Layers.EmbeddingLayer(features, vocabDim, embedDim, 'embeddingmatrix.txt')
+            #l1 = Layers.EmbeddingLayer(features, vocabDim, embedDim, 'embeddingmatrix.txt')
+            l1 = Layers.EmbeddingLayerSparse(features, vocabDim, embedDim, 'embeddingmatrix.txt')
            l2 = Layers.LSTMLayer(l1, embedDim, lstmDim, cellDim, BS.Sequences.Last)
            l3 = Layers.DenseLayer(l2, lstmDim, numLabels, Pass)
            z = l3
@ -53,7 +60,8 @@ Train=[
        # definition with layer composition
        modelLayerStyle (features) = {
            // load the pre-learned word embedding matrix
-            l1 = Layers.EmbeddingLayer(features, vocabDim, embedDim, 'embeddingmatrix.txt')
+            l1o = Layers.EmbeddingLayer(features, vocabDim, embedDim, 'embeddingmatrix.txt')
+            l1 = EmbeddingLayer {embedDim, embeddingPath='embeddingmatrix.txt', transpose=true} (features)
            l2 = Layers.LSTMLayer(l1, embedDim, lstmDim, cellDim, BS.Sequences.Last)
            l3 = Layers.DenseLayer(l2, lstmDim, numLabels, Pass)
            z = l3
@ -61,7 +69,7 @@ Train=[

        # inputs
        t = DynamicAxis{}
-        features = Input {1, dynamicAxis=t}   # Input has shape (1,t)
+        features = SparseInput {$vocabDim$, dynamicAxis=t}   # Input has shape (1,t)
        labels   = Input {numLabels}          # Input has shape (numLabels,*) where all sequences in *=1

        # apply model
@ -92,22 +100,14 @@ Train=[
        # We are testing checkpointing, keep all checkpoint (.ckp) files
        keepCheckPointFiles = true
    ]
-    
+
    reader = [
        readerType = "CNTKTextFormatReader"
-        file = "$DataDir$/Train.txt"            
-        
-        input = [            
-            features=[
-                alias = "x"                
-                dim = 1               
-                format = "dense"
-            ]
-            labels=[
-                alias = "y"                
-                dim = 5           
-                format = "dense"
-            ]
+        #file = "$DataDir$/Train.txt"
+        file = "$DataDir$/Train.ctf"
+        input = [
+            features = [ alias = "x" ; dim = $vocabDim$ ; format = "sparse" ]
+            labels =   [ alias = "y" ; dim = 5          ; format = "dense" ]
        ]
   ]    
   outputPath = "$OutputDir$/output.txt"        # dump the output as text?
--- a/Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf
+++ b/Tests/EndToEndTests/Text/SequenceClassification/Data/Train.ctf