From 0db167f5084227b7e754ecb94230f77719a0f508 Mon Sep 17 00:00:00 2001 From: Thilo Will Date: Fri, 23 Sep 2016 16:50:54 +0200 Subject: [PATCH] removing tabs. --- .../CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs | 46 +++++++++---------- .../ComputationNetwork.cpp | 24 +++++----- .../ComputationNetwork.h | 8 ++-- .../LinearAlgebraNodes.h | 5 +- Source/Math/Matrix.cpp | 24 ++++++---- Source/SGDLib/SGD.cpp | 2 +- 6 files changed, 57 insertions(+), 52 deletions(-) diff --git a/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs b/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs index b03ad7823..30b88eb26 100644 --- a/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs +++ b/Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs @@ -463,32 +463,30 @@ CNTK2 = [ CrossEntropyWithSoftmax_new (L, z, tag='') = Minus (ReduceLogSum (z), TransposeTimes (L, z), tag=tag) ClassificationError_new (L, z, tag='') = Minus (BS.Constants.One, TransposeTimes (L, Hardmax (z)), tag=tag) - CrossEntropyWithSampledSoftmax(hiddenLayer /* Vector of dimension nHidden */, - labels /* One-hot for the true class (labels). Dimension: nClasses */, + CrossEntropyWithSampledSoftmax(hiddenLayer /* Vector of dimension nHidden */, + labels /* One-hot for the true class (labels). Dimension: nClasses */, randomSampleSelector /* Sparse matrix of dimension nClasses * nSamples */, weights /* nClasses * nHidden */, bias /* Biases for logit computation. Dimension nClasses */, logInclusionProb /* Inclusion probablilities (derived from sampling weights) */ ) = [ - # Getting the weights matrix wS in the subspace of samples. Dimension: nHidden * nSamples - wS = TransposeTimes(weights, randomSampleSelector) + # Getting the weights matrix wS in the subspace of samples. Dimension: nHidden * nSamples + wS = TransposeTimes(weights, randomSampleSelector) zS = TransposeTimes(wS, hiddenLayer) + TransposeTimes(randomSampleSelector, bias - logInclusionProb) - # Getting the weight vector for the true label. Dimension nHidden + # Getting the weight vector for the true label. Dimension nHidden wT = TransposeTimes(weights, labels) zT = TransposeTimes(wT, hiddenLayer) + TransposeTimes(labels, bias - logInclusionProb) zSReduced = ReduceLogSum(zS) - - # The label (true class), might already be among the sampled classes. - # To get the 'partition function' over the union of label and sampled classes - # we need to LogPlus zt if the label is not among the sampled classes. - labelIsInSampled = ReduceSum(TransposeTimes(labels,randomSampleSelector)) -# logSum = BS.Boolean.If(labelIsInSampled, zSReduced, LogPlus(zT, zSReduced)) + + # The label (true class), might already be among the sampled classes. + # To get the 'partition function' over the union of label and sampled classes + # we need to LogPlus zt if the label is not among the sampled classes. + labelIsInSampled = ReduceSum(TransposeTimes(labels,randomSampleSelector)) + logSum = BS.Boolean.If(labelIsInSampled, zSReduced, LogPlus(zT, zSReduced)) -# ce = logSum - zT - - ce = LogPlus(zT, zSReduced) - zT + ce = logSum - zT ].ce @@ -500,21 +498,21 @@ CNTK2 = [ NotEqual(_, y, tag='') = new ComputationNode [ operation = 'NotEqual' ; inputs = _AsNodes (_ : y) /*plus the function args*/ ] LessEqual(_, y, tag='') = new ComputationNode [ operation = 'LessEqual' ; inputs = _AsNodes (_ : y) /*plus the function args*/ ] - // 13. Others - Pass(_, tag='') = new ComputationNode [ operation = 'Pass' ; inputs = _AsNodes (_) /*plus the function args*/ ] + // 13. Others + Pass(_, tag='') = new ComputationNode [ operation = 'Pass' ; inputs = _AsNodes (_) /*plus the function args*/ ] Identity = Pass - GetRandomSample(_ ,numSamples, sampleWithReplacement, tag='') = new ComputationNode [ - operation = 'RandomSample' ; - nSamples = numSamples; + GetRandomSample(_ ,numSamples, sampleWithReplacement, tag='') = new ComputationNode [ + operation = 'RandomSample' ; + nSamples = numSamples; allowDuplicates = sampleWithReplacement; - estimateInclusionProbs = false; + estimateInclusionProbs = false; inputs = _ /*plus the function args*/ ] - GetInclusionProb(_ ,numSamples, sampleWithReplacement, tag='') = new ComputationNode [ - operation = 'RandomSample' ; - nSamples = numSamples; + GetInclusionProb(_ ,numSamples, sampleWithReplacement, tag='') = new ComputationNode [ + operation = 'RandomSample' ; + nSamples = numSamples; allowDuplicates = sampleWithReplacement; - estimateInclusionProbs = true; + estimateInclusionProbs = true; inputs = _ /*plus the function args*/ ] ] diff --git a/Source/ComputationNetworkLib/ComputationNetwork.cpp b/Source/ComputationNetworkLib/ComputationNetwork.cpp index 8f247210b..a12f8d993 100644 --- a/Source/ComputationNetworkLib/ComputationNetwork.cpp +++ b/Source/ComputationNetworkLib/ComputationNetwork.cpp @@ -553,19 +553,19 @@ template template /* static */ void ComputationNetwork::SetRandomSampleNodeSeed(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, size_t randSeedBase) { - // BUGBUG Here we have code duplication with SetDropoutRate(...). Remove this by using SetDropRate fr the drop rate and setting random seeds for both RandomSampleNode and - // DropoutNode here. - list randomSampleNodes = net->GetNodesWithType(OperationNameOf(RandomSampleNode), criterionNode); + // BUGBUG Here we have code duplication with SetDropoutRate(...). Remove this by using SetDropRate fr the drop rate and setting random seeds for both RandomSampleNode and + // DropoutNode here. + list randomSampleNodes = net->GetNodesWithType(OperationNameOf(RandomSampleNode), criterionNode); - // Each RandomSampleNode gets a distinct seed. The actual seed for each dropout node is computed as follows: - // seed = (((parallelWorkerIdx * maxEpochs) + currentEpochNum) /*i.e. randSeedBase*/ * dropoutNodes.size()) + dropoutNodeIdx. - size_t randSeed = randSeedBase * randomSampleNodes.size(); - for (auto& nodeIter : randomSampleNodes) - { - auto node = dynamic_pointer_cast>(nodeIter); - node->SetRandomSeed(randSeed); - randSeed++; - } + // Each RandomSampleNode gets a distinct seed. The actual seed for each dropout node is computed as follows: + // seed = (((parallelWorkerIdx * maxEpochs) + currentEpochNum) /*i.e. randSeedBase*/ * dropoutNodes.size()) + dropoutNodeIdx. + size_t randSeed = randSeedBase * randomSampleNodes.size(); + for (auto& nodeIter : randomSampleNodes) + { + auto node = dynamic_pointer_cast>(nodeIter); + node->SetRandomSeed(randSeed); + randSeed++; + } } diff --git a/Source/ComputationNetworkLib/ComputationNetwork.h b/Source/ComputationNetworkLib/ComputationNetwork.h index e926a13ab..f4684b6d2 100644 --- a/Source/ComputationNetworkLib/ComputationNetwork.h +++ b/Source/ComputationNetworkLib/ComputationNetwork.h @@ -448,10 +448,10 @@ public: template static void SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate, size_t randSeedBase); - template - static void SetRandomSampleNodeSeed(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, size_t randSeedBase); - - template + template + static void SetRandomSampleNodeSeed(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, size_t randSeedBase); + + template static void SetBatchNormalizationTimeConstants(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, double normalizationTimeConstant, double& prevNormalizationTimeConstant, double blendTimeConstant, double& prevBlendTimeConstant); diff --git a/Source/ComputationNetworkLib/LinearAlgebraNodes.h b/Source/ComputationNetworkLib/LinearAlgebraNodes.h index 59c083573..3dd3c9005 100644 --- a/Source/ComputationNetworkLib/LinearAlgebraNodes.h +++ b/Source/ComputationNetworkLib/LinearAlgebraNodes.h @@ -306,7 +306,7 @@ public: return; } - Value().SwitchToMatrixType(DENSE, MatrixFormat::matrixFormatDense, false); + Value().SwitchToMatrixType(DENSE, MatrixFormat::matrixFormatDense, false); // TensorView::DoMatrixProductOf() will reduce each tensor object into a 2D tensor (or fail if it cannot) // and recreate actual Matrix objects (in case of sparse, they must be identical to the original tensor storage object). @@ -314,8 +314,7 @@ public: auto input0 = OneSampleTensorFor(0, /*gradient=*/false, fr.AllowBroadcast()); auto input1 = OneSampleTensorFor(1, /*gradient=*/false, fr.AllowBroadcast()); auto output = OneSampleTensorFor(-1, /*gradient=*/false, fr); - - output.AssignMatrixProductOf(false/*transC*/, input0, m_transpose/*transA*/, input1, false/*transB*/); + output.AssignMatrixProductOf(false/*transC*/, input0, m_transpose/*transA*/, input1, false/*transB*/); } virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override diff --git a/Source/Math/Matrix.cpp b/Source/Math/Matrix.cpp index 37ff14217..26ef571bd 100644 --- a/Source/Math/Matrix.cpp +++ b/Source/Math/Matrix.cpp @@ -4429,20 +4429,28 @@ void Matrix::MultiplyAndWeightedAdd(ElemType alpha, const Matrix ANY { - if (b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * DENSE -> DENSE + if ( b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * DENSE -> DENSE { CPUSparseMatrix::MultiplyAndWeightedAdd(alpha, *a.m_CPUSparseMatrix, transposeA, *b.m_CPUMatrix, transposeB, beta, *c.m_CPUMatrix); c.SetDataLocation(CPU, DENSE); } - else if (b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * SPARSE -> DENSE - { + else if (b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * SPARSE -> DENSE + { NOT_IMPLEMENTED; } - else - { - NOT_IMPLEMENTED; - } - } + else if (b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::SPARSE)// CPU, SPARSE * DENSE -> SPARSE + { + NOT_IMPLEMENTED; + } + else if (b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::SPARSE)// CPU, SPARSE * SPARSE -> SPARSE + { + NOT_IMPLEMENTED; + } + else + { + NOT_IMPLEMENTED; + } + } else // CPU, DENSE * ANY -> ANY { if (b.GetMatrixType() == MatrixType::SPARSE) // CPU, DENSE * SPARSE -> ANY diff --git a/Source/SGDLib/SGD.cpp b/Source/SGDLib/SGD.cpp index 4ba2e66d3..b72568434 100644 --- a/Source/SGDLib/SGD.cpp +++ b/Source/SGDLib/SGD.cpp @@ -405,7 +405,7 @@ void SGD::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net, size_t parallelWorkerIdx = ((m_mpi == nullptr) || !UsingParallelTrain(i)) ? 0 : m_mpi->CurrentNodeRank(); size_t dropoutRandSeedBase = (parallelWorkerIdx * m_maxEpochs) + i; ComputationNetwork::SetDropoutRate(net, criterionNodes[0], m_dropoutRates[i], prevDropoutRate, dropoutRandSeedBase); - ComputationNetwork::SetRandomSampleNodeSeed(net, criterionNodes[0], dropoutRandSeedBase); + ComputationNetwork::SetRandomSampleNodeSeed(net, criterionNodes[0], dropoutRandSeedBase); ComputationNetwork::SetBatchNormalizationTimeConstants(net, criterionNodes[0], m_batchNormalizationTimeConstant[i], prevNormalizationTimeConstant, m_batchNormalizationBlendTimeConstant[i], prevNormalizationBlendTimeConstant);