This commit is contained in:
Thilo Will 2016-09-23 16:50:54 +02:00
Родитель 658461ab5c
Коммит 0db167f508
6 изменённых файлов: 57 добавлений и 52 удалений

Просмотреть файл

@ -463,32 +463,30 @@ CNTK2 = [
CrossEntropyWithSoftmax_new (L, z, tag='') = Minus (ReduceLogSum (z), TransposeTimes (L, z), tag=tag)
ClassificationError_new (L, z, tag='') = Minus (BS.Constants.One, TransposeTimes (L, Hardmax (z)), tag=tag)
CrossEntropyWithSampledSoftmax(hiddenLayer /* Vector of dimension nHidden */,
labels /* One-hot for the true class (labels). Dimension: nClasses */,
CrossEntropyWithSampledSoftmax(hiddenLayer /* Vector of dimension nHidden */,
labels /* One-hot for the true class (labels). Dimension: nClasses */,
randomSampleSelector /* Sparse matrix of dimension nClasses * nSamples */,
weights /* nClasses * nHidden */,
bias /* Biases for logit computation. Dimension nClasses */,
logInclusionProb /* Inclusion probablilities (derived from sampling weights) */
) = [
# Getting the weights matrix wS in the subspace of samples. Dimension: nHidden * nSamples
wS = TransposeTimes(weights, randomSampleSelector)
# Getting the weights matrix wS in the subspace of samples. Dimension: nHidden * nSamples
wS = TransposeTimes(weights, randomSampleSelector)
zS = TransposeTimes(wS, hiddenLayer) + TransposeTimes(randomSampleSelector, bias - logInclusionProb)
# Getting the weight vector for the true label. Dimension nHidden
# Getting the weight vector for the true label. Dimension nHidden
wT = TransposeTimes(weights, labels)
zT = TransposeTimes(wT, hiddenLayer) + TransposeTimes(labels, bias - logInclusionProb)
zSReduced = ReduceLogSum(zS)
# The label (true class), might already be among the sampled classes.
# To get the 'partition function' over the union of label and sampled classes
# we need to LogPlus zt if the label is not among the sampled classes.
labelIsInSampled = ReduceSum(TransposeTimes(labels,randomSampleSelector))
# logSum = BS.Boolean.If(labelIsInSampled, zSReduced, LogPlus(zT, zSReduced))
# The label (true class), might already be among the sampled classes.
# To get the 'partition function' over the union of label and sampled classes
# we need to LogPlus zt if the label is not among the sampled classes.
labelIsInSampled = ReduceSum(TransposeTimes(labels,randomSampleSelector))
logSum = BS.Boolean.If(labelIsInSampled, zSReduced, LogPlus(zT, zSReduced))
# ce = logSum - zT
ce = LogPlus(zT, zSReduced) - zT
ce = logSum - zT
].ce
@ -500,21 +498,21 @@ CNTK2 = [
NotEqual(_, y, tag='') = new ComputationNode [ operation = 'NotEqual' ; inputs = _AsNodes (_ : y) /*plus the function args*/ ]
LessEqual(_, y, tag='') = new ComputationNode [ operation = 'LessEqual' ; inputs = _AsNodes (_ : y) /*plus the function args*/ ]
// 13. Others
Pass(_, tag='') = new ComputationNode [ operation = 'Pass' ; inputs = _AsNodes (_) /*plus the function args*/ ]
// 13. Others
Pass(_, tag='') = new ComputationNode [ operation = 'Pass' ; inputs = _AsNodes (_) /*plus the function args*/ ]
Identity = Pass
GetRandomSample(_ ,numSamples, sampleWithReplacement, tag='') = new ComputationNode [
operation = 'RandomSample' ;
nSamples = numSamples;
GetRandomSample(_ ,numSamples, sampleWithReplacement, tag='') = new ComputationNode [
operation = 'RandomSample' ;
nSamples = numSamples;
allowDuplicates = sampleWithReplacement;
estimateInclusionProbs = false;
estimateInclusionProbs = false;
inputs = _ /*plus the function args*/ ]
GetInclusionProb(_ ,numSamples, sampleWithReplacement, tag='') = new ComputationNode [
operation = 'RandomSample' ;
nSamples = numSamples;
GetInclusionProb(_ ,numSamples, sampleWithReplacement, tag='') = new ComputationNode [
operation = 'RandomSample' ;
nSamples = numSamples;
allowDuplicates = sampleWithReplacement;
estimateInclusionProbs = true;
estimateInclusionProbs = true;
inputs = _ /*plus the function args*/ ]
]

Просмотреть файл

@ -553,19 +553,19 @@ template <class ElemType>
template <class ElemType>
/* static */ void ComputationNetwork::SetRandomSampleNodeSeed(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, size_t randSeedBase)
{
// BUGBUG Here we have code duplication with SetDropoutRate(...). Remove this by using SetDropRate fr the drop rate and setting random seeds for both RandomSampleNode and
// DropoutNode here.
list<ComputationNodeBasePtr> randomSampleNodes = net->GetNodesWithType(OperationNameOf(RandomSampleNode), criterionNode);
// BUGBUG Here we have code duplication with SetDropoutRate(...). Remove this by using SetDropRate fr the drop rate and setting random seeds for both RandomSampleNode and
// DropoutNode here.
list<ComputationNodeBasePtr> randomSampleNodes = net->GetNodesWithType(OperationNameOf(RandomSampleNode), criterionNode);
// Each RandomSampleNode gets a distinct seed. The actual seed for each dropout node is computed as follows:
// seed = (((parallelWorkerIdx * maxEpochs) + currentEpochNum) /*i.e. randSeedBase*/ * dropoutNodes.size()) + dropoutNodeIdx.
size_t randSeed = randSeedBase * randomSampleNodes.size();
for (auto& nodeIter : randomSampleNodes)
{
auto node = dynamic_pointer_cast<RandomSampleNode<ElemType>>(nodeIter);
node->SetRandomSeed(randSeed);
randSeed++;
}
// Each RandomSampleNode gets a distinct seed. The actual seed for each dropout node is computed as follows:
// seed = (((parallelWorkerIdx * maxEpochs) + currentEpochNum) /*i.e. randSeedBase*/ * dropoutNodes.size()) + dropoutNodeIdx.
size_t randSeed = randSeedBase * randomSampleNodes.size();
for (auto& nodeIter : randomSampleNodes)
{
auto node = dynamic_pointer_cast<RandomSampleNode<ElemType>>(nodeIter);
node->SetRandomSeed(randSeed);
randSeed++;
}
}

Просмотреть файл

@ -448,10 +448,10 @@ public:
template <class ElemType>
static void SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate, size_t randSeedBase);
template <class ElemType>
static void SetRandomSampleNodeSeed(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, size_t randSeedBase);
template <class ElemType>
template <class ElemType>
static void SetRandomSampleNodeSeed(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, size_t randSeedBase);
template <class ElemType>
static void SetBatchNormalizationTimeConstants(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode,
double normalizationTimeConstant, double& prevNormalizationTimeConstant,
double blendTimeConstant, double& prevBlendTimeConstant);

Просмотреть файл

@ -306,7 +306,7 @@ public:
return;
}
Value().SwitchToMatrixType(DENSE, MatrixFormat::matrixFormatDense, false);
Value().SwitchToMatrixType(DENSE, MatrixFormat::matrixFormatDense, false);
// TensorView::DoMatrixProductOf() will reduce each tensor object into a 2D tensor (or fail if it cannot)
// and recreate actual Matrix objects (in case of sparse, they must be identical to the original tensor storage object).
@ -314,8 +314,7 @@ public:
auto input0 = OneSampleTensorFor(0, /*gradient=*/false, fr.AllowBroadcast());
auto input1 = OneSampleTensorFor(1, /*gradient=*/false, fr.AllowBroadcast());
auto output = OneSampleTensorFor(-1, /*gradient=*/false, fr);
output.AssignMatrixProductOf(false/*transC*/, input0, m_transpose/*transA*/, input1, false/*transB*/);
output.AssignMatrixProductOf(false/*transC*/, input0, m_transpose/*transA*/, input1, false/*transB*/);
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override

Просмотреть файл

@ -4429,20 +4429,28 @@ void Matrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const Matrix<ElemT
{
if (a.GetMatrixType() == MatrixType::SPARSE) // CPU, SPARSE * ANY -> ANY
{
if (b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * DENSE -> DENSE
if ( b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * DENSE -> DENSE
{
CPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_CPUSparseMatrix, transposeA, *b.m_CPUMatrix, transposeB, beta, *c.m_CPUMatrix);
c.SetDataLocation(CPU, DENSE);
}
else if (b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * SPARSE -> DENSE
{
else if (b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * SPARSE -> DENSE
{
NOT_IMPLEMENTED;
}
else
{
NOT_IMPLEMENTED;
}
}
else if (b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::SPARSE)// CPU, SPARSE * DENSE -> SPARSE
{
NOT_IMPLEMENTED;
}
else if (b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::SPARSE)// CPU, SPARSE * SPARSE -> SPARSE
{
NOT_IMPLEMENTED;
}
else
{
NOT_IMPLEMENTED;
}
}
else // CPU, DENSE * ANY -> ANY
{
if (b.GetMatrixType() == MatrixType::SPARSE) // CPU, DENSE * SPARSE -> ANY

Просмотреть файл

@ -405,7 +405,7 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
size_t parallelWorkerIdx = ((m_mpi == nullptr) || !UsingParallelTrain(i)) ? 0 : m_mpi->CurrentNodeRank();
size_t dropoutRandSeedBase = (parallelWorkerIdx * m_maxEpochs) + i;
ComputationNetwork::SetDropoutRate<ElemType>(net, criterionNodes[0], m_dropoutRates[i], prevDropoutRate, dropoutRandSeedBase);
ComputationNetwork::SetRandomSampleNodeSeed<ElemType>(net, criterionNodes[0], dropoutRandSeedBase);
ComputationNetwork::SetRandomSampleNodeSeed<ElemType>(net, criterionNodes[0], dropoutRandSeedBase);
ComputationNetwork::SetBatchNormalizationTimeConstants<ElemType>(net, criterionNodes[0],
m_batchNormalizationTimeConstant[i], prevNormalizationTimeConstant,
m_batchNormalizationBlendTimeConstant[i], prevNormalizationBlendTimeConstant);