removing tabs.
This commit is contained in:
Родитель
658461ab5c
Коммит
0db167f508
|
@ -463,32 +463,30 @@ CNTK2 = [
|
|||
CrossEntropyWithSoftmax_new (L, z, tag='') = Minus (ReduceLogSum (z), TransposeTimes (L, z), tag=tag)
|
||||
ClassificationError_new (L, z, tag='') = Minus (BS.Constants.One, TransposeTimes (L, Hardmax (z)), tag=tag)
|
||||
|
||||
CrossEntropyWithSampledSoftmax(hiddenLayer /* Vector of dimension nHidden */,
|
||||
labels /* One-hot for the true class (labels). Dimension: nClasses */,
|
||||
CrossEntropyWithSampledSoftmax(hiddenLayer /* Vector of dimension nHidden */,
|
||||
labels /* One-hot for the true class (labels). Dimension: nClasses */,
|
||||
randomSampleSelector /* Sparse matrix of dimension nClasses * nSamples */,
|
||||
weights /* nClasses * nHidden */,
|
||||
bias /* Biases for logit computation. Dimension nClasses */,
|
||||
logInclusionProb /* Inclusion probablilities (derived from sampling weights) */
|
||||
) = [
|
||||
# Getting the weights matrix wS in the subspace of samples. Dimension: nHidden * nSamples
|
||||
wS = TransposeTimes(weights, randomSampleSelector)
|
||||
# Getting the weights matrix wS in the subspace of samples. Dimension: nHidden * nSamples
|
||||
wS = TransposeTimes(weights, randomSampleSelector)
|
||||
zS = TransposeTimes(wS, hiddenLayer) + TransposeTimes(randomSampleSelector, bias - logInclusionProb)
|
||||
|
||||
# Getting the weight vector for the true label. Dimension nHidden
|
||||
# Getting the weight vector for the true label. Dimension nHidden
|
||||
wT = TransposeTimes(weights, labels)
|
||||
zT = TransposeTimes(wT, hiddenLayer) + TransposeTimes(labels, bias - logInclusionProb)
|
||||
|
||||
zSReduced = ReduceLogSum(zS)
|
||||
|
||||
# The label (true class), might already be among the sampled classes.
|
||||
# To get the 'partition function' over the union of label and sampled classes
|
||||
# we need to LogPlus zt if the label is not among the sampled classes.
|
||||
labelIsInSampled = ReduceSum(TransposeTimes(labels,randomSampleSelector))
|
||||
# logSum = BS.Boolean.If(labelIsInSampled, zSReduced, LogPlus(zT, zSReduced))
|
||||
|
||||
# The label (true class), might already be among the sampled classes.
|
||||
# To get the 'partition function' over the union of label and sampled classes
|
||||
# we need to LogPlus zt if the label is not among the sampled classes.
|
||||
labelIsInSampled = ReduceSum(TransposeTimes(labels,randomSampleSelector))
|
||||
logSum = BS.Boolean.If(labelIsInSampled, zSReduced, LogPlus(zT, zSReduced))
|
||||
|
||||
# ce = logSum - zT
|
||||
|
||||
ce = LogPlus(zT, zSReduced) - zT
|
||||
ce = logSum - zT
|
||||
].ce
|
||||
|
||||
|
||||
|
@ -500,21 +498,21 @@ CNTK2 = [
|
|||
NotEqual(_, y, tag='') = new ComputationNode [ operation = 'NotEqual' ; inputs = _AsNodes (_ : y) /*plus the function args*/ ]
|
||||
LessEqual(_, y, tag='') = new ComputationNode [ operation = 'LessEqual' ; inputs = _AsNodes (_ : y) /*plus the function args*/ ]
|
||||
|
||||
// 13. Others
|
||||
Pass(_, tag='') = new ComputationNode [ operation = 'Pass' ; inputs = _AsNodes (_) /*plus the function args*/ ]
|
||||
// 13. Others
|
||||
Pass(_, tag='') = new ComputationNode [ operation = 'Pass' ; inputs = _AsNodes (_) /*plus the function args*/ ]
|
||||
Identity = Pass
|
||||
GetRandomSample(_ ,numSamples, sampleWithReplacement, tag='') = new ComputationNode [
|
||||
operation = 'RandomSample' ;
|
||||
nSamples = numSamples;
|
||||
GetRandomSample(_ ,numSamples, sampleWithReplacement, tag='') = new ComputationNode [
|
||||
operation = 'RandomSample' ;
|
||||
nSamples = numSamples;
|
||||
allowDuplicates = sampleWithReplacement;
|
||||
estimateInclusionProbs = false;
|
||||
estimateInclusionProbs = false;
|
||||
inputs = _ /*plus the function args*/ ]
|
||||
|
||||
GetInclusionProb(_ ,numSamples, sampleWithReplacement, tag='') = new ComputationNode [
|
||||
operation = 'RandomSample' ;
|
||||
nSamples = numSamples;
|
||||
GetInclusionProb(_ ,numSamples, sampleWithReplacement, tag='') = new ComputationNode [
|
||||
operation = 'RandomSample' ;
|
||||
nSamples = numSamples;
|
||||
allowDuplicates = sampleWithReplacement;
|
||||
estimateInclusionProbs = true;
|
||||
estimateInclusionProbs = true;
|
||||
inputs = _ /*plus the function args*/ ]
|
||||
]
|
||||
|
||||
|
|
|
@ -553,19 +553,19 @@ template <class ElemType>
|
|||
template <class ElemType>
|
||||
/* static */ void ComputationNetwork::SetRandomSampleNodeSeed(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, size_t randSeedBase)
|
||||
{
|
||||
// BUGBUG Here we have code duplication with SetDropoutRate(...). Remove this by using SetDropRate fr the drop rate and setting random seeds for both RandomSampleNode and
|
||||
// DropoutNode here.
|
||||
list<ComputationNodeBasePtr> randomSampleNodes = net->GetNodesWithType(OperationNameOf(RandomSampleNode), criterionNode);
|
||||
// BUGBUG Here we have code duplication with SetDropoutRate(...). Remove this by using SetDropRate fr the drop rate and setting random seeds for both RandomSampleNode and
|
||||
// DropoutNode here.
|
||||
list<ComputationNodeBasePtr> randomSampleNodes = net->GetNodesWithType(OperationNameOf(RandomSampleNode), criterionNode);
|
||||
|
||||
// Each RandomSampleNode gets a distinct seed. The actual seed for each dropout node is computed as follows:
|
||||
// seed = (((parallelWorkerIdx * maxEpochs) + currentEpochNum) /*i.e. randSeedBase*/ * dropoutNodes.size()) + dropoutNodeIdx.
|
||||
size_t randSeed = randSeedBase * randomSampleNodes.size();
|
||||
for (auto& nodeIter : randomSampleNodes)
|
||||
{
|
||||
auto node = dynamic_pointer_cast<RandomSampleNode<ElemType>>(nodeIter);
|
||||
node->SetRandomSeed(randSeed);
|
||||
randSeed++;
|
||||
}
|
||||
// Each RandomSampleNode gets a distinct seed. The actual seed for each dropout node is computed as follows:
|
||||
// seed = (((parallelWorkerIdx * maxEpochs) + currentEpochNum) /*i.e. randSeedBase*/ * dropoutNodes.size()) + dropoutNodeIdx.
|
||||
size_t randSeed = randSeedBase * randomSampleNodes.size();
|
||||
for (auto& nodeIter : randomSampleNodes)
|
||||
{
|
||||
auto node = dynamic_pointer_cast<RandomSampleNode<ElemType>>(nodeIter);
|
||||
node->SetRandomSeed(randSeed);
|
||||
randSeed++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -448,10 +448,10 @@ public:
|
|||
template <class ElemType>
|
||||
static void SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate, size_t randSeedBase);
|
||||
|
||||
template <class ElemType>
|
||||
static void SetRandomSampleNodeSeed(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, size_t randSeedBase);
|
||||
|
||||
template <class ElemType>
|
||||
template <class ElemType>
|
||||
static void SetRandomSampleNodeSeed(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, size_t randSeedBase);
|
||||
|
||||
template <class ElemType>
|
||||
static void SetBatchNormalizationTimeConstants(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode,
|
||||
double normalizationTimeConstant, double& prevNormalizationTimeConstant,
|
||||
double blendTimeConstant, double& prevBlendTimeConstant);
|
||||
|
|
|
@ -306,7 +306,7 @@ public:
|
|||
return;
|
||||
}
|
||||
|
||||
Value().SwitchToMatrixType(DENSE, MatrixFormat::matrixFormatDense, false);
|
||||
Value().SwitchToMatrixType(DENSE, MatrixFormat::matrixFormatDense, false);
|
||||
|
||||
// TensorView::DoMatrixProductOf() will reduce each tensor object into a 2D tensor (or fail if it cannot)
|
||||
// and recreate actual Matrix objects (in case of sparse, they must be identical to the original tensor storage object).
|
||||
|
@ -314,8 +314,7 @@ public:
|
|||
auto input0 = OneSampleTensorFor(0, /*gradient=*/false, fr.AllowBroadcast());
|
||||
auto input1 = OneSampleTensorFor(1, /*gradient=*/false, fr.AllowBroadcast());
|
||||
auto output = OneSampleTensorFor(-1, /*gradient=*/false, fr);
|
||||
|
||||
output.AssignMatrixProductOf(false/*transC*/, input0, m_transpose/*transA*/, input1, false/*transB*/);
|
||||
output.AssignMatrixProductOf(false/*transC*/, input0, m_transpose/*transA*/, input1, false/*transB*/);
|
||||
}
|
||||
|
||||
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
|
||||
|
|
|
@ -4429,20 +4429,28 @@ void Matrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const Matrix<ElemT
|
|||
{
|
||||
if (a.GetMatrixType() == MatrixType::SPARSE) // CPU, SPARSE * ANY -> ANY
|
||||
{
|
||||
if (b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * DENSE -> DENSE
|
||||
if ( b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * DENSE -> DENSE
|
||||
{
|
||||
CPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_CPUSparseMatrix, transposeA, *b.m_CPUMatrix, transposeB, beta, *c.m_CPUMatrix);
|
||||
c.SetDataLocation(CPU, DENSE);
|
||||
}
|
||||
else if (b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * SPARSE -> DENSE
|
||||
{
|
||||
else if (b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * SPARSE -> DENSE
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
else
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
}
|
||||
else if (b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::SPARSE)// CPU, SPARSE * DENSE -> SPARSE
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
else if (b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::SPARSE)// CPU, SPARSE * SPARSE -> SPARSE
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
else
|
||||
{
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
}
|
||||
else // CPU, DENSE * ANY -> ANY
|
||||
{
|
||||
if (b.GetMatrixType() == MatrixType::SPARSE) // CPU, DENSE * SPARSE -> ANY
|
||||
|
|
|
@ -405,7 +405,7 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
|
|||
size_t parallelWorkerIdx = ((m_mpi == nullptr) || !UsingParallelTrain(i)) ? 0 : m_mpi->CurrentNodeRank();
|
||||
size_t dropoutRandSeedBase = (parallelWorkerIdx * m_maxEpochs) + i;
|
||||
ComputationNetwork::SetDropoutRate<ElemType>(net, criterionNodes[0], m_dropoutRates[i], prevDropoutRate, dropoutRandSeedBase);
|
||||
ComputationNetwork::SetRandomSampleNodeSeed<ElemType>(net, criterionNodes[0], dropoutRandSeedBase);
|
||||
ComputationNetwork::SetRandomSampleNodeSeed<ElemType>(net, criterionNodes[0], dropoutRandSeedBase);
|
||||
ComputationNetwork::SetBatchNormalizationTimeConstants<ElemType>(net, criterionNodes[0],
|
||||
m_batchNormalizationTimeConstant[i], prevNormalizationTimeConstant,
|
||||
m_batchNormalizationBlendTimeConstant[i], prevNormalizationBlendTimeConstant);
|
||||
|
|
Загрузка…
Ссылка в новой задаче