Merge branch 'master' into jiajia/IRMetricV3

This commit is contained in:
Jian Jiao 2016-10-12 13:53:12 -07:00
Родитель bf072cfc59 ed4a0ddac7
Коммит cc4eab4627
25 изменённых файлов: 1094 добавлений и 549 удалений

4
.gitattributes поставляемый
Просмотреть файл

@ -32,6 +32,7 @@ Readme text
*.pl text
*.ps1 text
*.ps text
*.i text
*.sh text eol=lf
build-and-test text eol=lf
@ -57,6 +58,7 @@ Makefile text
*.vssettings text
*.csproj text
*.props text
*.asax text
*.h text
*.cpp text
@ -81,6 +83,8 @@ Makefile text
*.log text
Dockerfile. text
# Speech data
mean.363 text
var.363 text

Просмотреть файл

@ -289,13 +289,13 @@ void OutputFunctionInfo(FunctionPtr func)
auto inputVariables = func->Arguments();
fprintf(stderr, "Function %S: Input Variables (count=%lu)\n", func->Name().c_str(), inputVariables.size());
for_each(inputVariables.begin(), inputVariables.end(), [](const Variable v) {
fprintf(stderr, " name=%S, kind=%d\n", v.Name().c_str(), v.Kind());
fprintf(stderr, " name=%S, kind=%d\n", v.Name().c_str(), static_cast<int>(v.Kind()));
});
auto outputVariables = func->Outputs();
fprintf(stderr, "Function %S: Output Variables (count=%lu)\n", func->Name().c_str(), outputVariables.size());
for_each(outputVariables.begin(), outputVariables.end(), [](const Variable v) {
fprintf(stderr, " name=%S, kind=%d\n", v.Name().c_str(), v.Kind());
fprintf(stderr, " name=%S, kind=%d\n", v.Name().c_str(), static_cast<int>(v.Kind()));
});
}

Просмотреть файл

@ -355,6 +355,7 @@ COMPUTATION_NETWORK_LIB_SRC =\
$(SOURCEDIR)/ComputationNetworkLib/ComputationNetworkEditing.cpp \
$(SOURCEDIR)/ComputationNetworkLib/ComputationNetworkBuilder.cpp \
$(SOURCEDIR)/ComputationNetworkLib/ComputationNetworkScripting.cpp \
$(SOURCEDIR)/ComputationNetworkLib/TrainingNodes.cpp \
SEQUENCE_TRAINING_LIB_SRC =\
$(SOURCEDIR)/SequenceTrainingLib/latticeforwardbackward.cpp \

Просмотреть файл

@ -459,7 +459,7 @@ CNTK2 = [
if axis==0 then new ComputationNode [ operation = 'CrossEntropyWithSoftmax' ; inputs = _AsNodes (labelSequence : outProbVectorSequence) /*plus the function args*/ ]
else [ tag1 = tag; out = Minus (ReduceLogSum (outProbVectorSequence, axis=axis), ReduceSum (labelSequence .* outProbVectorSequence, axis=axis), tag=tag1) ].out
# Classification error along a specific axis: account only for missed labels, i.e.
# strictly check whether at the one “1” location in labels we find a value equal to the max
# strictly check whether at the one '1' location in labels we find a value equal to the max
ClassificationError(labelSequence, outVectorSequence, topN=1, axis=0, tag='') =
if axis==0 then new ComputationNode [ operation = 'ClassificationError' ; inputs = _AsNodes (if topN == 1 then (labelSequence : outVectorSequence) else (labelSequence : outVectorSequence : Constant (topN))) /*plus the function args*/ ]
else if topN != 1 then Fail ("ClassificationError() along a specific axis does not support topN.")
@ -485,9 +485,26 @@ CNTK2 = [
NotEqual(_, y, tag='') = new ComputationNode [ operation = 'NotEqual' ; inputs = _AsNodes (_ : y) /*plus the function args*/ ]
LessEqual(_, y, tag='') = new ComputationNode [ operation = 'LessEqual' ; inputs = _AsNodes (_ : y) /*plus the function args*/ ]
// 13. Others
Pass(_, tag='') = new ComputationNode [ operation = 'Pass' ; inputs = _AsNodes (_) /*plus the function args*/ ]
// 13. Others
Pass(_, tag='') = new ComputationNode [ operation = 'Pass' ; inputs = _AsNodes (_) /*plus the function args*/ ]
Identity = Pass
// The value of GetRandomSample(weights /* vector of length nClasses */, numSamples, sampleWithReplacement) randomly samples numSamples using the specified sampling weights.
// The result is a sparse matrix of num samples one-hot vectors as columns.
GetRandomSample(_ ,numSamples, sampleWithReplacement, tag='') = new ComputationNode [
operation = 'RandomSample' ;
sizeOfSampledSet = numSamples;
allowDuplicates = sampleWithReplacement;
inputs = _ /*plus the function args*/ ]
// The value of GetInclusion(weights /* vector of length nClasses */, numSamples, sampleWithReplacement) has to be seen in cojuction to GetRandomSample(...).
// While GetRandomSample(...) creates a set of samples, GetInclusion(...) tells how often each class is expected to occur in the sampled sets.
// For sampling with replacment the relation to the sampling weights is trivial but not for sampling without replacment.
GetInclusionFrequency(_ ,numSamples, sampleWithReplacement, tag='') = new ComputationNode [
operation = 'RandomSampleInclusionFrequency' ;
sizeOfSampledSet = numSamples;
allowDuplicates = sampleWithReplacement;
inputs = _ /*plus the function args*/ ]
]
# Parameter{} can do several forms of initialization.

Просмотреть файл

@ -72,6 +72,7 @@ namespace CNTK
{
// The CNTK reader implementation requires for each deserializer both the module and deserializer type be specified
// This is redundant and the V2 API users will just specify type from which the module is automatically inferred
// TODO: This should be done in the same manner for CNTK exe as well.
Dictionary augmentedConfiguration = configuration;
auto& deserializerConfigurations = augmentedConfiguration[L"deserializers"].Value<std::vector<DictionaryValue>>();
for (auto& deserializerConfig : deserializerConfigurations)
@ -129,11 +130,14 @@ namespace CNTK
typedef Reader*(*CreateCompositeDataReaderProc)(const ConfigParameters* parameters);
CreateCompositeDataReaderProc createReaderProc = (CreateCompositeDataReaderProc)Plugin().Load(L"CompositeDataReader", "CreateCompositeDataReader");
m_compositeDataReader.reset(createReaderProc(&config));
std::shared_ptr<Microsoft::MSR::CNTK::Reader> compositeDataReader(createReaderProc(&config));
auto compositeDataReaderStreamDescs = m_compositeDataReader->GetStreamDescriptions();
for (auto streamDesc : compositeDataReaderStreamDescs)
m_compositeDataReaderStreamDescs = compositeDataReader->GetStreamDescriptions();
for (auto streamDesc : m_compositeDataReaderStreamDescs)
m_streamInfos.insert({ streamDesc->m_name, streamDesc->m_id, AsStorageFormat(streamDesc->m_storageType), AsDataType(streamDesc->m_elementType), AsNDShape(*(streamDesc->m_sampleLayout)) });
m_shim = std::shared_ptr<ReaderShim<float>>(new ReaderShim<float>(compositeDataReader), [](ReaderShim<float>* x) { x->Destroy(); });
m_shim->Init(config);
}
/*virtual*/ const std::unordered_map<StreamInformation, MinibatchData>&
@ -155,59 +159,70 @@ namespace CNTK
{
// TODO: Add support for distributed reading
EpochConfiguration epochConfig = { 1, 0, minibatchSizeInSamples, m_epochSize, 0, 0 };
m_matrices.clear();
std::map<std::wstring, int> requiredStreams;
std::unordered_set<InputStreamDescription> inputs;
for (const auto& s : m_streamInfos)
// Allocating all on CPU for now.
requiredStreams[s.m_name] = CPUDEVICE;
{
assert(s.m_storageFormat == StorageFormat::Dense || s.m_storageFormat == StorageFormat::SparseCSC);
auto inputStreamDescription = InputStreamDescription(
s.m_name,
AsCNTKImplDeviceId(device),
s.m_storageFormat == StorageFormat::Dense ? MatrixType::DENSE : MatrixType::SPARSE,
s.m_storageFormat == StorageFormat::Dense ? MatrixFormat::matrixFormatDense : MatrixFormat::matrixFormatSparseCSC);
inputs.insert(inputStreamDescription);
m_compositeDataReader->StartEpoch(epochConfig, requiredStreams);
if (s.m_elementType == DataType::Float)
{
auto iter = std::find_if(m_compositeDataReaderStreamDescs.begin(), m_compositeDataReaderStreamDescs.end(), [s](StreamDescriptionPtr& streamInfo) {
return streamInfo->m_id == s.m_id;
});
assert(iter != m_compositeDataReaderStreamDescs.end());
m_matrices.AddInput(
s.m_name,
std::make_shared<Matrix<float>>(0, 0, inputStreamDescription.GetDeviceId(), inputStreamDescription.GetMatrixType(), inputStreamDescription.GetMatrixFormat()),
std::make_shared<MBLayout>(),
*(*iter)->m_sampleLayout);
}
else
LogicError("Input data of type other than DataType::Float is currently unsupported by the CNTK built-in composite MinibatchSource!");
}
m_shim->StartEpoch(epochConfig, inputs);
m_prevMinibatchSize = minibatchSizeInSamples;
}
if (minibatchSizeInSamples != m_prevMinibatchSize)
LogicError("GetNextMinibatch: Changing minibatch sizes across calls is currently unsupported");
auto compositeReaderMinibatchData = m_compositeDataReader->ReadMinibatch();
m_epochEndReached = compositeReaderMinibatchData.m_endOfEpoch;
auto compositeReaderMinibatchDataEmpty = m_shim->GetMinibatch(m_matrices);
m_epochEndReached = m_shim->IsEndOfEpoch();
auto& streamInfos = StreamInfos();
auto compositeDataReaderStreamDescs = m_compositeDataReader->GetStreamDescriptions();
size_t numStreams = compositeDataReaderStreamDescs.size();
for (size_t i = 0; i < numStreams; ++i)
for (const auto& s: m_streamInfos)
{
auto currentStreamDesc = compositeDataReaderStreamDescs[i];
auto iter = std::find_if(streamInfos.begin(), streamInfos.end(), [currentStreamDesc](const StreamInformation& streamInfo) {
return streamInfo.m_id == currentStreamDesc->m_id;
});
if (iter == streamInfos.end())
continue;
auto& currentStreamInfo = *iter;
auto sampleShape = AsNDShape(*(currentStreamDesc->m_sampleLayout));
auto input = m_matrices.GetInput(s.m_name);
auto& currentStreamInfo = s;
ValuePtr minibatchValuePtr;
if (compositeReaderMinibatchData.m_data.empty())
if (!compositeReaderMinibatchDataEmpty)
{
minibatchValuePtr = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(currentStreamInfo.m_elementType, sampleShape.AppendShape({ 0, 0 }), DeviceDescriptor::CPUDevice()));
minibatchValuePtr = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(currentStreamInfo.m_elementType, s.m_sampleLayout.AppendShape({ 0, 0 }), DeviceDescriptor::CPUDevice()));
continue;
}
auto currentStreamMinibatchData = compositeReaderMinibatchData.m_data[i];
if (currentStreamDesc->m_elementType == ElementType::tfloat)
if (s.m_elementType == DataType::Float)
{
auto CNTKMatrixType = (currentStreamDesc->m_storageType == StorageType::dense) ? DENSE : SPARSE;
auto CNTKMatrixFormat = (currentStreamDesc->m_storageType == StorageType::dense) ? matrixFormatDense : matrixFormatSparseCSC;
auto dataMatrix = std::make_shared<Matrix<float>>(0, 0, CPUDEVICE, CNTKMatrixType, CNTKMatrixFormat);
size_t sampleSize = currentStreamDesc->m_sampleLayout->GetNumElements();
auto matrixType = (s.m_storageFormat == StorageFormat::Dense) ? DENSE : SPARSE;
auto matrixFormat = (s.m_storageFormat == StorageFormat::Dense) ? matrixFormatDense : matrixFormatSparseCSC;
// Can we reuse this, not allocating it each time?
auto dataMatrix = std::make_shared<Matrix<float>>(0, 0, input.GetMatrix<float>().GetDeviceId(), matrixType, matrixFormat);
// TODO: Eliminate the unnecessary CPU to CPU copy
ReaderShim<float>::FillMatrixFromStream(currentStreamDesc->m_storageType, dataMatrix.get(), sampleSize, currentStreamMinibatchData, nullptr);
minibatchValuePtr = MakeSharedObject<PackedValue>(sampleShape, dataMatrix, currentStreamMinibatchData->m_layout, /*readOnly =*/ false);
std::swap(*dataMatrix, input.GetMatrix<float>());
minibatchValuePtr = MakeSharedObject<PackedValue>(s.m_sampleLayout, dataMatrix, input.pMBLayout, /*readOnly =*/ false);
size_t numSamples = currentStreamMinibatchData->m_layout->GetActualNumSamples();
size_t numSequences = currentStreamMinibatchData->m_layout->GetNumSequences();
size_t numSamples = input.pMBLayout->GetActualNumSamples();
size_t numSequences = input.pMBLayout->GetNumSequences();
m_minibatchData[currentStreamInfo] = { numSequences, numSamples, minibatchValuePtr };
}

Просмотреть файл

@ -9,6 +9,7 @@
#include "CNTKLibrary.h"
#include "Utils.h"
#include "Reader.h"
#include "ReaderShim.h"
namespace CNTK
{
@ -25,10 +26,17 @@ namespace CNTK
private:
std::unordered_set<StreamInformation> m_streamInfos;
std::shared_ptr<Microsoft::MSR::CNTK::Reader> m_compositeDataReader;
bool m_epochEndReached;
size_t m_prevMinibatchSize;
size_t m_epochSize;
std::unordered_map<StreamInformation, MinibatchData> m_minibatchData;
std::vector<Microsoft::MSR::CNTK::StreamDescriptionPtr> m_compositeDataReaderStreamDescs;
// For now reusing the shim to allow prefetch.
// Please only use a subset of the shim interface that includes
// Init()/StartEpoch()/GetMinibatch()/IsEndOfEpoch()
// Shim will be deleted in the future versions.
std::shared_ptr<Microsoft::MSR::CNTK::ReaderShim<float>> m_shim;
Microsoft::MSR::CNTK::StreamMinibatchInputs m_matrices;
};
}

Просмотреть файл

@ -9,6 +9,7 @@
#include "Platform.h"
#include "ExceptionWithCallStack.h"
#include <cmath>
#include <string>
#include <vector>
#include <assert.h>
@ -25,7 +26,7 @@
#define TWO_PI 6.283185307f // TODO: find the official standards-confirming definition of this and use it instead
#define EPSILON 1e-5
#define ISCLOSE(a, b, threshold) (abs(a - b) < threshold) ? true : false
#define ISCLOSE(a, b, threshold) (std::abs(a - b) < threshold) ? true : false
#define DLCLOSE_SUCCESS 0
#define UNUSED(x) (void)(x) // for variables that are, e.g., only used in _DEBUG builds

Просмотреть файл

@ -524,7 +524,7 @@ void ComputationNetwork::CollectInputAndLearnableParametersRec(const Computation
}
template <class ElemType>
/*static*/ void ComputationNetwork::SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate, size_t randSeedBase)
/*static*/ void ComputationNetwork::SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate)
{
list<ComputationNodeBasePtr> dropoutNodes = net->GetNodesWithType(OperationNameOf(DropoutNode), criterionNode);
if (dropoutRate != prevDropoutRate)
@ -535,21 +535,35 @@ template <class ElemType>
fprintf(stderr, "WARNING: Attempting to set dropout rate, but there is no dropout node in the network.\n");
}
// Each dropout node gets a distinct seed. The actual seed for each dropout node is computed as follows:
// seed = (((parallelWorkerIdx * maxEpochs) + currentEpochNum) /*i.e. randSeedBase*/ * dropoutNodes.size()) + dropoutNodeIdx
size_t randSeed = randSeedBase * dropoutNodes.size();
for (auto& nodeIter : dropoutNodes)
{
auto node = dynamic_pointer_cast<DropoutNode<ElemType>>(nodeIter);
if (dropoutRate != prevDropoutRate)
node->SetDropoutRate(dropoutRate);
node->SetRandomSeed(randSeed);
randSeed++;
}
prevDropoutRate = dropoutRate;
}
template <class ElemType>
/* static */ void ComputationNetwork::SetIRngUserSeed(ComputationNetworkPtr net, const ComputationNodeBasePtr& node, size_t randSeedBase)
{
// Predicate checking if the node is derived from IRngUser
function<bool(const ComputationNodeBasePtr&)> nodeIsIRngUser = [](const ComputationNodeBasePtr& p) { return dynamic_pointer_cast<IRngUser>(p) != nullptr; };
list<ComputationNodeBasePtr> rngUserNodes = net->GetNodesWhere(nodeIsIRngUser, node);
// Each IRngUser gets a distinct seed. This seed is computed as follows:
// seed = (((parallelWorkerIdx * maxEpochs) + currentEpochNum) /*i.e. randSeedBase*/ * rngUserNodes.size()) + dropoutNodeIdx.
size_t randSeed = randSeedBase * rngUserNodes.size();
for (auto& nodeIter : rngUserNodes)
{
auto rngUser = dynamic_pointer_cast<IRngUser>(nodeIter);
rngUser->SetRandomSeed(randSeed);
randSeed++;
}
}
template <class ElemType>
/*static*/ void ComputationNetwork::SetBatchNormalizationTimeConstants(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode,
double normalizationTimeConstant, double& prevNormalizationTimeConstant,
@ -1490,7 +1504,8 @@ template void ComputationNetwork::InitLearnableParametersWithBilinearFill<float>
template void ComputationNetwork::Read<float>(const wstring& fileName);
template void ComputationNetwork::ReadPersistableParameters<float>(File& fstream, bool create);
template void ComputationNetwork::PerformSVDecomposition<float>(const map<wstring, float>& SVDConfig, size_t alignedsize);
template /*static*/ void ComputationNetwork::SetDropoutRate<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate, size_t randSeedBase);
template /*static*/ void ComputationNetwork::SetDropoutRate<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate);
template /*static*/ void ComputationNetwork::SetIRngUserSeed<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, size_t randSeedBase);
template /*static*/ void ComputationNetwork::SetBatchNormalizationTimeConstants<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double normalizationTimeConstant, double& prevNormalizationTimeConstant, double blendTimeConstant, double& prevBlendTimeConstant);
template void ComputationNetwork::SetSeqParam<float>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, const double& hsmoothingWeight, const double& frameDropThresh, const bool& doreferencealign,
const double& amf, const double& lmf, const double& wp, const double& bMMIfactor, const bool& sMBR);
@ -1500,7 +1515,8 @@ template void ComputationNetwork::InitLearnableParametersWithBilinearFill<double
template void ComputationNetwork::Read<double>(const wstring& fileName);
template void ComputationNetwork::ReadPersistableParameters<double>(File& fstream, bool create);
template void ComputationNetwork::PerformSVDecomposition<double>(const map<wstring, float>& SVDConfig, size_t alignedsize);
template /*static*/ void ComputationNetwork::SetDropoutRate<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate, size_t randSeedBase);
template /*static*/ void ComputationNetwork::SetDropoutRate<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate);
template /*static*/ void ComputationNetwork::SetIRngUserSeed<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, size_t randSeedBase);
template /*static*/ void ComputationNetwork::SetBatchNormalizationTimeConstants<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double normalizationTimeConstant, double& prevNormalizationTimeConstant, double blendTimeConstant, double& prevBlendTimeConstant);
template void ComputationNetwork::SetSeqParam<double>(ComputationNetworkPtr net, const ComputationNodeBasePtr criterionNode, const double& hsmoothingWeight, const double& frameDropThresh, const bool& doreferencealign,
const double& amf, const double& lmf, const double& wp, const double& bMMIfactor, const bool& sMBR);

Просмотреть файл

@ -446,8 +446,11 @@ public:
// TODO: Why are all these static, but then take a network as the first argument? --> make them class members
template <class ElemType>
static void SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate, size_t randSeedBase);
static void SetDropoutRate(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, const double dropoutRate, double& prevDropoutRate);
template <class ElemType>
static void SetIRngUserSeed(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode, size_t randSeedBase);
template <class ElemType>
static void SetBatchNormalizationTimeConstants(ComputationNetworkPtr net, const ComputationNodeBasePtr& criterionNode,
double normalizationTimeConstant, double& prevNormalizationTimeConstant,
@ -652,18 +655,19 @@ public:
return std::vector<ComputationNodeBasePtr>(outputNodes.begin(), outputNodes.end());
}
std::list<ComputationNodeBasePtr> GetNodesWithType(const wstring typeName, const ComputationNodeBasePtr& rootNode = nullptr)
std::list<ComputationNodeBasePtr> GetNodesWhere(std::function<bool(const ComputationNodeBasePtr&)>& predicate, const ComputationNodeBasePtr& rootNode = nullptr) const
{
std::list<ComputationNodeBasePtr> nodesWithType;
std::list<ComputationNodeBasePtr> filteredNodes;
// find nodes from all available nodes
// TODO: This distinction should not be necessary anymore. Calling GetEvalOrder(nullptr) will have the same effect.
if (rootNode == nullptr)
{
for (auto nodeIter = m_nameToNodeMap.begin(); nodeIter != m_nameToNodeMap.end(); nodeIter++)
{
ComputationNodeBasePtr node = nodeIter->second;
if (node->OperationName() == typeName)
nodesWithType.push_back(node);
if (predicate(node))
filteredNodes.push_back(node);
}
}
else
@ -671,12 +675,18 @@ public:
// for calculating a specific node
for (const auto& node : GetEvalOrder(rootNode)) // TODO: verify that no use of this requires the actual eval order, then change to GetAllNodesForRoot()
{
if (node->OperationName() == typeName)
nodesWithType.push_back(node);
if (predicate(node))
filteredNodes.push_back(node);
}
}
return nodesWithType;
return filteredNodes;
}
std::list<ComputationNodeBasePtr> GetNodesWithType(const wstring typeName, const ComputationNodeBasePtr& rootNode = nullptr) const
{
std::function<bool(const ComputationNodeBasePtr&)> predicate = [typeName](const ComputationNodeBasePtr& node) { return node->OperationName() == typeName; };
return GetNodesWhere(predicate, rootNode);
}
public:

Просмотреть файл

@ -91,6 +91,8 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
else if (nodeType == OperationNameOf(PerDimMeanVarDeNormalizationNode)) return New<PerDimMeanVarDeNormalizationNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(PassNode)) return New<PassNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(PlusNode)) return New<PlusNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(RandomSampleNode)) return New<RandomSampleNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(RandomSampleInclusionFrequencyNode)) return New<RandomSampleInclusionFrequencyNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ReconcileDynamicAxisNode)) return New<ReconcileDynamicAxisNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(ReciprocalNode)) return New<ReciprocalNode<ElemType>>(forward<_Types>(_Args)...);
else if (nodeType == OperationNameOf(RectifiedLinearNode)) return New<RectifiedLinearNode<ElemType>>(forward<_Types>(_Args)...);
@ -812,6 +814,18 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::RowSt
return net.AddNodeToNetAndAttachInputs(New<RowStackNode<ElemType>>(net.GetDeviceId(), nodeName), { inputs });
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::RandomSample(const ComputationNodePtr a, const std::wstring nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<RandomSampleNode<ElemType>>(net.GetDeviceId(), nodeName), { a });
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::RandomSampleInclusionFrequency(const ComputationNodePtr a, const std::wstring nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<RandomSampleInclusionFrequencyNode<ElemType>>(net.GetDeviceId(), nodeName), { a });
}
#ifdef COMING_SOON
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::GMMLogLikelihood(const ComputationNodePtr unnormedPrior,

Просмотреть файл

@ -157,6 +157,8 @@ public:
ComputationNodePtr PerDimMeanVarNormalization(const ComputationNodePtr feature, const ComputationNodePtr mean, const ComputationNodePtr InvStdDev, const std::wstring nodeName = L"");
ComputationNodePtr Plus(const ComputationNodePtr a, const ComputationNodePtr b, const std::wstring nodeName = L"");
ComputationNodePtr Reciprocal(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr RandomSample(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr RandomSampleInclusionFrequency(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr RectifiedLinear(const ComputationNodePtr a, const std::wstring nodeName = L"");
ComputationNodePtr Reshape(const ComputationNodePtr a, const TensorShape& imageLayout, const std::wstring nodeName = L"");
ComputationNodePtr RowRepeat(const ComputationNodePtr a, const size_t num_repeat, const std::wstring nodeName = L"");

Просмотреть файл

@ -127,6 +127,7 @@
<ClCompile Include="RNNNodes.cpp" />
<ClCompile Include="SpecialPurposeNodes.cpp" />
<ClCompile Include="stdafx.cpp" />
<ClCompile Include="TrainingNodes.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />

Просмотреть файл

@ -46,6 +46,9 @@
<ClCompile Include="RecurrentNodes.cpp">
<Filter>Nodes</Filter>
</ClCompile>
<ClCompile Include="TrainingNodes.cpp">
<Filter>Nodes</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\Common\Include\fileutil.h">

Просмотреть файл

@ -0,0 +1,225 @@
//
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
//
#include "TrainingNodes.h"
namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
void RandomSampleNodeBase<ElemType>::CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const
{
Base::CopyTo(nodeP, newName, flags);
if (flags & CopyNodeFlags::copyNodeValue)
{
auto node = dynamic_pointer_cast<RandomSampleNodeBase<ElemType>>(nodeP);
node->m_allowDuplicates = m_allowDuplicates;
node->m_sizeOfSampledSet = m_sizeOfSampledSet;
node->m_randomSeed = m_randomSeed;
}
}
template<class ElemType>
void RandomSampleNodeBase<ElemType>::Save(File& fstream) const
{
Base::Save(fstream);
fstream << m_allowDuplicates;
fstream << m_sizeOfSampledSet;
}
template<class ElemType>
void RandomSampleNodeBase<ElemType>::Load(File& fstream, size_t modelVersion)
{
Base::Load(fstream, modelVersion);
fstream >> m_allowDuplicates;
fstream >> m_sizeOfSampledSet;
}
template<class ElemType>
void RandomSampleNodeBase<ElemType>::UpdateWeightsPrefixSum()
{
const Matrix<ElemType>& samplingWeights = Input(0)->ValueAsMatrix();
m_samplingWeightsPrefixSum.clear();
double runningWeightsSum = 0;
for (int iClass = 0; iClass < samplingWeights.GetNumRows(); iClass++)
{
ElemType currentWeight = samplingWeights.GetValue(iClass, 0);
runningWeightsSum += currentWeight;
m_samplingWeightsPrefixSum.push_back(runningWeightsSum);
}
}
// Runs the sampling returning a vector with the id's of the samples. The parameter nTries is used to return the number of draws that was needed
// to get the expected number of samples.
template<class ElemType>
const std::vector<size_t> RandomSampleNodeBase<ElemType>::RunSampling(long& nTries)
{
std::uniform_real_distribution<double> r(0, m_samplingWeightsPrefixSum.back());
std::unordered_set<int> alreadySampled;
std::vector<size_t> samples;
CPURNGHandle* cpuRNGHandle = dynamic_cast<CPURNGHandle*>(&GetRNGHandle(CPUDEVICE));
// find random samples using the specified weight
if (m_allowDuplicates)
nTries = m_sizeOfSampledSet;
else
nTries = 0; // just initialize and count how many tries we need.
while (samples.size() < m_sizeOfSampledSet)
{
double randomValue = r(cpuRNGHandle->Generator());
// Find the first index where value[idx] >= randomValue.
auto lower = std::lower_bound(m_samplingWeightsPrefixSum.begin(), m_samplingWeightsPrefixSum.end(), randomValue);
int idx = (int)(lower - m_samplingWeightsPrefixSum.begin());
if (m_allowDuplicates)
samples.push_back(idx);
else
{
// Sampling without replacement: each value can be sampled at most once.
// The implementation below using rejection sampling is problematic.
// E.g if first class has probability p = 0.999 we typically will have to sample 1000 times or more to hit another class.
// BUGBUG Alternative implementions, e.g:
// * Weighted Random Sampling with Reservoir: http://utopia.duth.gr/~pefraimi/research/data/2007EncOfAlg.pdf
// * Binary tree with classes as leafes and branch probs on non-leafes.
// * As in numpy: https://github.com/numpy/numpy/blob/master/numpy/random/mtrand/mtrand.pyx#L1440
nTries++;
if (alreadySampled.find(idx) != alreadySampled.end()) continue;
else
{
samples.push_back(idx);
alreadySampled.insert(idx);
}
}
}
return samples;
}
template<class ElemType>
void RandomSampleNode<ElemType>::ForwardPropNonLooping()
{
Base::UpdateWeightsPrefixSum();
Matrix<ElemType>& valueMatrix = ValueAsMatrix();
valueMatrix.TransferToDeviceIfNotThere(CPUDEVICE, /*ismoved =*/ true/*means: BOTH state not ok */, /*emptyTransfer =*/ true, /*updatePreferredDevice =*/ false);
valueMatrix.SetDevice(CPUDEVICE);
//BUGBUG: matrix type should be configured during validation
valueMatrix.SwitchToMatrixType(SPARSE, matrixFormatSparseCSC, false);
valueMatrix.Reset();
// Get vector with indices of randomly sampled classes
const std::vector<size_t> samples = GetWeightedSamples();
// Set columns of (sparse) result matrix as indicator vectors
for (size_t i = 0; i < Base::m_sizeOfSampledSet; i++)
{
int sample = samples[i];
valueMatrix.SetValue(sample, i, 1);
}
}
template<class ElemType>
const std::vector<size_t> RandomSampleNode<ElemType>::GetWeightedSamples()
{
long dummy;
// Here we are not interested in the number of sampling tries needed, which is returned in the parameter.
return Base::RunSampling(dummy);
}
template<class ElemType>
void RandomSampleNode<ElemType>::Validate(bool isFinalValidationPass)
{
Base::Validate(isFinalValidationPass);
m_pMBLayout = nullptr;
let& shape = Input(0)->GetSampleLayout();
let dims = shape.GetDims();
size_t nClasses = dims[0];
// Output: a (sparse) matrix containing m_sizeOfSampledSet columns of 1-hot vectors specifiying the sampled classes.
SetDims(TensorShape(nClasses, Base::m_sizeOfSampledSet), false);
}
template<class ElemType>
bool RandomSampleNode<ElemType>::IsOutOfDateWrtInputs() const
{
// If we are in the mode to generate random samples (i.e. m_estimateInSampleFrequency == false)
// we need to recompute the result for each mini-batch even if the weight vector didn't change.
return true;
}
template<class ElemType>
double RandomSampleInclusionFrequencyNode<ElemType>::EstimateNumberOfTries()
{
// We estimate the average numver of tries by repeating a fixed number of experiments
const size_t numExperiments = 10; // We choose 10 without any deep justification.
long totalTries = 0;
for (int iExperiment = 0; iExperiment < numExperiments; iExperiment++)
{
long nTries;
Base::RunSampling(nTries);
totalTries += nTries;
}
return totalTries / (double)numExperiments;
}
// Estimates the expected number of occurences of each class in the sampled set.
// For sampling without replacement we use estimate using average number of tries. (Inspired by TensorFlow)
// BUGBUG: Consider to reimplement using a less biased estimate as proposed by Nikos.
template<class ElemType>
double RandomSampleInclusionFrequencyNode<ElemType>::EstimateInSampleFrequency(double p, double estimatedNumTries) const
{
if (Base::m_allowDuplicates)
{
return p * Base::m_sizeOfSampledSet;
}
else /* No duplicates allowed. Estimated count is same as probability of inclusion. */
{
return -expm1(estimatedNumTries * log1p(-p));
}
}
template<class ElemType>
void RandomSampleInclusionFrequencyNode<ElemType>::ForwardPropNonLooping()
{
Base::UpdateWeightsPrefixSum();
Matrix<ElemType>& valueMatrix = ValueAsMatrix();
valueMatrix.TransferToDeviceIfNotThere(CPUDEVICE, /*ismoved =*/ true/*means: BOTH state not ok */, /*emptyTransfer =*/ true, /*updatePreferredDevice =*/ false);
valueMatrix.SetDevice(CPUDEVICE);
//BUGBUG: matrix type should be configured during validation
valueMatrix.SwitchToMatrixType(DENSE, matrixFormatDense, false);
double sumOfWeights = Base::m_samplingWeightsPrefixSum.back();
const Matrix<ElemType>& samplingWeights = Input(0)->ValueAsMatrix();
double estimatedNumTries = EstimateNumberOfTries();
for (int i = 0; i < Base::m_samplingWeightsPrefixSum.size(); i++)
{
// Get the sampling probablility for from the weights for i-th class.
double samplingProb = samplingWeights.GetValue(i, 0) / sumOfWeights;
double estimatedCount = EstimateInSampleFrequency(samplingProb, estimatedNumTries);
valueMatrix.SetValue(i, 0, (ElemType)estimatedCount);
}
}
template<class ElemType>
void RandomSampleInclusionFrequencyNode<ElemType>::Validate(bool isFinalValidationPass)
{
Base::Validate(isFinalValidationPass);
m_pMBLayout = nullptr;
let& shape = Input(0)->GetSampleLayout();
let dims = shape.GetDims();
size_t nClasses = dims[0];
// Output: one vector containing the estimated in sample frequency for each class.
SetDims(TensorShape(nClasses, 1), false);
}
template class RandomSampleNode<float>;
template class RandomSampleNode<double>;
template class RandomSampleInclusionFrequencyNode<float>;
template class RandomSampleInclusionFrequencyNode<double>;
}}}

Просмотреть файл

@ -8,6 +8,8 @@
#include "ComputationNode.h"
#include "BatchNormalizationEngine.h"
#include "RNGHandle.h"
#include "CPURNGHandle.h"
#define __STDC_FORMAT_MACROS
#include <inttypes.h>
@ -17,6 +19,7 @@
#include <stdexcept>
#include <list>
#include <memory>
#include <random>
namespace Microsoft { namespace MSR { namespace CNTK {
@ -1141,6 +1144,179 @@ private:
template class NoiseContrastiveEstimationNode<float>;
template class NoiseContrastiveEstimationNode<double>;
// Nodes using a random number generators should derive from this interface.
// One purpuose of this interface is to have a common interface for setting the seeds when setting up a network.
class IRngUser
{
public:
virtual RNGHandle& GetRNGHandle(DEVICEID_TYPE deviceId) = 0;
virtual void SetRandomSeed(const unsigned long val) = 0;
};
// This implements IRngUser using RNGHandle.
class RngUser : public IRngUser
{
public:
RNGHandle& GetRNGHandle(DEVICEID_TYPE deviceId) override
{
if (!m_RNGHandle)
m_RNGHandle = RNGHandle::Create(deviceId, m_randomSeed);
return *m_RNGHandle;
}
// E.g. called from ComputationNetwork to make sure that CNTK running on different nodes will have different seed.
void SetRandomSeed(const unsigned long val) override
{
m_randomSeed = (unsigned long)val;
m_RNGHandle.reset(); // Reset handle. New handle will be generated with next call of GetRNGHandle(...).
}
protected:
unsigned long m_randomSeed = 0;
std::shared_ptr<RNGHandle> m_RNGHandle;
};
// ------------------------------------------------------------------------------------------------------------------------------------------------
// RandomSampleNodeBase(samplingWeights, sizeOfSampledSet, allowDuplicates):
// Base class for RandomSampleNode and RandomSampleInclusionFrequencyNode.
// Provides random sampling functionality.
//
// Parameters:
// * Input(0) Sampling weight vector: Matrix of shape (nClasses x 1) providing sampling weights >= 0.
// * sizeOfSampledSet: Size of the sampled set.
// * allowDuplicates: controls if sampled set is allowed to contain duplicates.
// --------------------------------------------------------------------------------------------------------------------------------------------------
template <class ElemType>
class RandomSampleNodeBase : public ComputationNodeNonLooping<ElemType>, public NumInputs<1>, public RngUser
{
typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName(){return L"RandomSampleNodeBase";}
public:
RandomSampleNodeBase(DEVICEID_TYPE deviceId, const wstring& name, int sizeOfSampledSet = 0, bool allowDuplicates = false)
: Base(deviceId, name), m_sizeOfSampledSet(sizeOfSampledSet), m_allowDuplicates(allowDuplicates)
{
SetRandomSeed((unsigned long)CreateUniqId());
}
RandomSampleNodeBase(const ScriptableObjects::IConfigRecordPtr configp)
: RandomSampleNodeBase(CPUDEVICE, L"<placeholder>", configp->Get(L"sizeOfSampledSet"), configp->Get(L"allowDuplicates"))
{
AttachInputsFromConfig(configp, this->GetExpectedNumInputs());
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override;
void Save(File& fstream) const;
virtual void Load(File& fstream, size_t modelVersion) override;
protected:
void UpdateWeightsPrefixSum();
// Runs the sampling returning a vector with the id's of the samples. The parameter nTries is used to return the number of draws that was needed
// to get the expected number of samples.
const std::vector<size_t> RunSampling(long& nTries);
public:
virtual void /*ComputationNode::*/ BackpropToNonLooping(size_t inputIndex) override {
// This node does not propagate gradients.
}
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override{}
virtual bool OutputUsedInComputingInputNodesGradients() const override { return false; }
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false;}
virtual void /*ComputationNode::*/ ForwardPropNonLooping() override{}
protected:
bool m_allowDuplicates; // The node can create samples allowing for duplicates (sampling with replacement) or not (sampling without replacement).
int m_sizeOfSampledSet; // Requested size of sample in case of run-mode = CREATE_SAMPLES.
std::vector<double> m_samplingWeightsPrefixSum;
};
// ------------------------------------------------------------------------------------------------------------------------------------------------
// RandomSampleNode(samplingWeights, sizeOfSampledSet, allowDuplicates):
// The node's value is a set of sizeOfSampledSet random samples represented as a (sparse) matrix of shape [nClasses x sizeOfSampledSet] where nClasses is the number of classes (categories) to choose from.
// The output has no dynamic axis.
// The samples are drawn according to the weight vector p(w_i) = w_i / sum_k(w_k)
// We get one set of samples for per minibatch.
// Intended uses are e.g. sampled softmax, noise contrastive estimation etc.
//
// Parameters:
// * Input(0): Sampling weight vector. Matrix of shape (nClasses x 1) providing sampling weights >= 0.
// * sizeOfSampledSet: Size of the sampled set.
// * allowDuplicates: controls if sampled set is allowed to contain duplicates.
// --------------------------------------------------------------------------------------------------------------------------------------------------
template<class ElemType>
class RandomSampleNode : public RandomSampleNodeBase<ElemType>
{
typedef RandomSampleNodeBase<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName(){ return L"RandomSample"; }
public:
RandomSampleNode(DEVICEID_TYPE deviceId, const wstring& name, int sizeOfSampledSet = 0, bool allowDuplicates = false)
: Base(deviceId, name, sizeOfSampledSet, allowDuplicates)
{}
RandomSampleNode(const ScriptableObjects::IConfigRecordPtr configp)
: RandomSampleNode(CPUDEVICE, L"<placeholder>", configp->Get(L"sizeOfSampledSet"), configp->Get(L"allowDuplicates"))
{
AttachInputsFromConfig(configp, this->GetExpectedNumInputs());
}
virtual void /*ComputationNode::*/ ForwardPropNonLooping() override;
const std::vector<size_t> GetWeightedSamples();
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override;
virtual bool IsOutOfDateWrtInputs() const override;
};
// ------------------------------------------------------------------------------------------------------------------------------------------------
// RandomSampleInclusionFrequencyNode(samplingWeights, sizeOfSampledSet, allowDuplicates):
// Intended uses are e.g. sampled softmax, noise contrastive estimation etc where it is used together with RandomSampleNode.
// This node estimates how often each class will occur in a set sampled with RandomSampleNode(...) on the average.
// If the sampling mode 'allowDuplicates = true' is choosen this is trivial and exact.
// For allowDuplicates = false we get some estimate. The value is updated only when the input weights change.
//
// Parameters:
// * Input(0): Sampling weight vector. Matrix of shape (nClasses x 1) providing sampling weights >= 0.
// * sizeOfSampledSet: Size of the sampled set.
// * allowDuplicates: controls if sampled set is allowed to contain duplicates.
// --------------------------------------------------------------------------------------------------------------------------------------------------
template<class ElemType>
class RandomSampleInclusionFrequencyNode : public RandomSampleNodeBase<ElemType>
{
typedef RandomSampleNodeBase<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName(){ return L"RandomSampleInclusionFrequency"; }
public:
RandomSampleInclusionFrequencyNode(DEVICEID_TYPE deviceId, const wstring& name, int sizeOfSampledSet = 0, bool allowDuplicates = false)
: Base(deviceId, name, sizeOfSampledSet, allowDuplicates)
{}
RandomSampleInclusionFrequencyNode(const ScriptableObjects::IConfigRecordPtr configp)
: RandomSampleInclusionFrequencyNode(CPUDEVICE, L"<placeholder>", configp->Get(L"sizeOfSampledSet"), configp->Get(L"allowDuplicates"))
{
AttachInputsFromConfig(configp, this->GetExpectedNumInputs());
}
virtual void /*ComputationNode::*/ ForwardPropNonLooping() override;
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override;
private:
// Approximates the expected number of occurences of a class in the sampled set.
// Assuming (falsely) that the number of tries to get a sampled set with the requested number of distinct values is always estimatedNumTries
// the probability that a specific class in in the sampled set is (1 - (1-p)^estimatedNumTries), where p is the probablity to pick the clas in one draw.
// The estimate can be quite a bit off but should be better than nothing. Better alternatives?
double EstimateInSampleFrequency(double p, double estimatedNumTries) const;
double EstimateNumberOfTries();
};
// -----------------------------------------------------------------------
// ClassBasedCrossEntropyWithSoftmaxNode (labeldata(.,t), inputdata(.,t), embeddingMatrix, clsProbBeforeSoftmaxData(.,t))
// - Input(0) [4 x T] label in dense matrix in
@ -1152,7 +1328,6 @@ template class NoiseContrastiveEstimationNode<double>;
// - Input(2) [hdsize x vocab_size] weight matrix in, for speed-up, as per word matrix can be simply obtained as column slice
// - Input(3) [nbr_cls x T] clsprob in dense matrix in. This input, if applied softmax on, is the posterior probabilty of class given observations
// -----------------------------------------------------------------------
// calculates: -sum(left_i * log(softmax_i(right))) for class given history and for word given history
// need to provide class probabilty from external node
template <class ElemType>
@ -1888,7 +2063,7 @@ template class LogisticNode<double>;
// -----------------------------------------------------------------------
template <class ElemType>
class DropoutNode : public ComputationNode<ElemType>, public NumInputs<1>
class DropoutNode : public ComputationNode<ElemType>, public NumInputs<1>, public RngUser
{
typedef ComputationNode<ElemType> Base;
UsingComputationNodeMembersBoilerplate;
@ -1903,7 +2078,7 @@ public:
: Base(deviceId, name),
m_dropoutRate(0)
{
m_randomSeed = (unsigned long) CreateUniqId();
SetRandomSeed((unsigned long)CreateUniqId());
}
virtual void /*ComputationNode::*/ BackpropTo(const size_t inputIndex, const FrameRange& fr) override
@ -1960,21 +2135,9 @@ public:
m_dropoutRate = val;
}
void SetRandomSeed(const unsigned long val)
{
m_randomSeed = (unsigned long) val;
// Upon change of the seed, reset RNGHandle to force the creation of a new RNGHandle
// during forward propagation
m_RNGHandle = nullptr;
}
RNGHandle& GetRNGHandle()
{
if (m_RNGHandle == nullptr)
m_RNGHandle = RNGHandle::Create(ValuePtr()->GetDeviceId(), m_randomSeed);
return *m_RNGHandle;
return RngUser::GetRNGHandle(ValuePtr()->GetDeviceId());
}
virtual void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -1984,7 +2147,7 @@ public:
{
auto node = dynamic_pointer_cast<DropoutNode<ElemType>>(nodeP);
node->m_dropoutRate = m_dropoutRate;
node->m_randomSeed = m_randomSeed;
node->SetRandomSeed(m_randomSeed);
node->m_maskOfDropout = m_maskOfDropout;
}
}
@ -2006,9 +2169,6 @@ public:
private:
double m_dropoutRate;
unsigned long m_randomSeed;
std::shared_ptr<RNGHandle> m_RNGHandle;
shared_ptr<Matrix<ElemType>> m_maskOfDropout;
};

Просмотреть файл

@ -1247,7 +1247,7 @@ void Matrix<ElemType>::AssignValuesOf(const Matrix<ElemType>& deepCopyFrom)
deepCopyFrom.m_CPUSparseMatrix->AssignColumnSliceToDense(tempCPUDenseMatrix, 0, deepCopyFrom.GetNumCols());
m_GPUMatrix->SetValue(deepCopyFrom.GetNumRows(), deepCopyFrom.GetNumCols(), this->GetDeviceId(), tempCPUDenseMatrix.Data());
},//{ m_GPUMatrix->SetValue(*deepCopyFrom.m_CPUSparseMatrix); },
{ LogicError("AssignValuesOf: Assigning a GPUSparseMatrix to a GPUMatrix is not yet implemented."); });//{ m_GPUMatrix->SetValue(*deepCopyFrom.m_GPUSparseMatrix); });
{ deepCopyFrom.m_GPUSparseMatrix->AssignColumnSliceToDense(*m_GPUMatrix, 0, deepCopyFrom.GetNumCols()); });
},
{
// Set CPUSparseMatrix from:
@ -4471,12 +4471,25 @@ void Matrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const Matrix<ElemT
{
if (a.GetMatrixType() == MatrixType::SPARSE) // CPU, SPARSE * ANY -> ANY
{
if (b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * DENSE -> DENSE
if (b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * DENSE -> DENSE
{
CPUSparseMatrix<ElemType>::MultiplyAndWeightedAdd(alpha, *a.m_CPUSparseMatrix, transposeA, *b.m_CPUMatrix, transposeB, beta, *c.m_CPUMatrix);
c.SetDataLocation(CPU, DENSE);
}
else{
else if (b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::DENSE) // CPU, SPARSE * SPARSE -> DENSE
{
NOT_IMPLEMENTED;
}
else if (b.GetMatrixType() == MatrixType::DENSE && c.GetMatrixType() == MatrixType::SPARSE)// CPU, SPARSE * DENSE -> SPARSE
{
NOT_IMPLEMENTED;
}
else if (b.GetMatrixType() == MatrixType::SPARSE && c.GetMatrixType() == MatrixType::SPARSE)// CPU, SPARSE * SPARSE -> SPARSE
{
NOT_IMPLEMENTED;
}
else
{
NOT_IMPLEMENTED;
}
}

Просмотреть файл

@ -19,7 +19,7 @@ inline size_t GetSizeByType(ElementType type)
case ElementType::tdouble:
return sizeof(double);
default:
RuntimeError("Unsupported type '%d'", type);
RuntimeError("Unsupported type '%d'", static_cast<int>(type));
}
}

Просмотреть файл

@ -23,7 +23,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template <class ElemType>
ReaderShim<ElemType>::ReaderShim(ReaderFactory factory)
: m_factory(factory), m_deviceId(CPUDEVICE), m_dataTransferers(2, DataTransfererPtr()), m_currentDataTransferIndex(0)
: m_factory(factory), m_deviceId(CPUDEVICE), m_dataTransferers(2, DataTransfererPtr()), m_currentDataTransferIndex(0), m_endOfEpoch(false)
{
}
template <class ElemType>
ReaderShim<ElemType>::ReaderShim(ReaderPtr reader)
: m_deviceId(CPUDEVICE), m_dataTransferers(2, DataTransfererPtr()), m_currentDataTransferIndex(0), m_reader(reader), m_factory(nullptr), m_endOfEpoch(false)
{
}
@ -40,7 +46,9 @@ void ReaderShim<ElemType>::Init(const ConfigParameters& config)
m_numParallelSequences = numberOfuttsPerMinibatchForAllEpochs[0];
m_reader = m_factory(config);
if (!m_reader)
m_reader = m_factory(config);
m_streams = m_reader->GetStreamDescriptions();
for (auto i : m_streams)
{
@ -63,12 +71,6 @@ void ReaderShim<ElemType>::StartDistributedMinibatchLoop(
const std::unordered_set<InputStreamDescription>& inputs,
size_t requestedEpochSamples /*= requestDataSize*/)
{
// For adaptive minibatch, make sure there are no outstanding reads.
if (m_prefetchTask.valid())
{
m_prefetchTask.wait();
}
EpochConfiguration config;
config.m_workerRank = subsetNum;
config.m_numberOfWorkers = numSubsets;
@ -76,6 +78,18 @@ void ReaderShim<ElemType>::StartDistributedMinibatchLoop(
config.m_totalEpochSizeInSamples = requestedEpochSamples;
config.m_epochIndex = epoch;
StartEpoch(config, inputs);
}
template <class ElemType>
void ReaderShim<ElemType>::StartEpoch(const EpochConfiguration& config, const std::unordered_set<InputStreamDescription>& inputs)
{
// For adaptive minibatch, make sure there are no outstanding reads.
if (m_prefetchTask.valid())
{
m_prefetchTask.wait();
}
// Let's check that there is no outstanding copies.
// Wait on all events if there are any pending copy operations in flight.
if (m_dataTransferers[m_currentDataTransferIndex])
@ -114,7 +128,7 @@ void ReaderShim<ElemType>::StartDistributedMinibatchLoop(
m_prefetchBuffers[i.GetStreamName()] = StreamPrefetchBuffer
{
std::make_shared<Matrix<ElemType>>(0, 0, i.GetDeviceId(), i.GetMatrixType(), i.GetMatrixFormat()),
nullptr
std::make_shared<MBLayout>()
};
}
@ -263,6 +277,10 @@ bool ReaderShim<ElemType>::GetMinibatch(StreamMinibatchInputs& matrices)
template <class ElemType>
typename ReaderShim<ElemType>::PrefetchResult ReaderShim<ElemType>::PrefetchMinibatch(size_t currentDataTransferIndex)
{
// Resetting layouts.
for (auto& mx : m_prefetchBuffers)
mx.second.m_mbLayout = std::make_shared<MBLayout>();
Minibatch minibatch = m_reader->ReadMinibatch();
// If there is no data we can simply return.

Просмотреть файл

@ -29,6 +29,8 @@ class ReaderShim : public IDataReader
friend class ::CNTK::CompositeMinibatchSource;
public:
explicit ReaderShim(ReaderFactory factory);
explicit ReaderShim(ReaderPtr reader);
virtual ~ReaderShim() { }
virtual void Init(const ScriptableObjects::IConfigRecord& /*config*/) override
@ -54,6 +56,8 @@ public:
virtual void StartMinibatchLoop(size_t mbSize, size_t epoch, const std::unordered_set<InputStreamDescription>& inputs, size_t requestedEpochSamples = requestDataSize) override;
virtual void StartDistributedMinibatchLoop(size_t requestedMBSize, size_t epoch, size_t subsetNum, size_t numSubsets, const std::unordered_set<InputStreamDescription>& inputs, size_t requestedEpochSamples) override;
void StartEpoch(const EpochConfiguration& epoch, const std::unordered_set<InputStreamDescription>& inputs);
virtual void StartMinibatchLoop(size_t, size_t, size_t) override
{
LogicError("Legacy StartMinibatchLoop is not implemented.");
@ -84,6 +88,11 @@ public:
virtual size_t GetCurrentSamplePosition() override;
bool IsEndOfEpoch() const
{
return m_endOfEpoch;
}
private:
struct PrefetchResult
{

Просмотреть файл

@ -6,6 +6,7 @@
#define _CRT_SECURE_NO_WARNINGS
#define _SCL_SECURE_NO_WARNINGS
#include <cmath>
#include <deque>
#include "TruncatedBpttPacker.h"
#include "ElementTypeUtils.h"
@ -148,7 +149,7 @@ void TruncatedBPTTPacker::StartEpoch(const EpochConfiguration& config, const std
}
// Estimating the number of parallel sequences to pack (slots) from the minibatch size and truncation size.
m_numParallelSequences = max(1, (int)floor(m_minibatchSize / m_truncationSize));
m_numParallelSequences = max(1, static_cast<int>(std::floor(m_minibatchSize / m_truncationSize)));
if (config.m_numberOfWorkers > m_numParallelSequences)
{

Просмотреть файл

@ -417,8 +417,9 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
// set dropout rate for this epoch
// We use the same seed across workers until parallel training kicks in to ensure that the workers have identical models
size_t parallelWorkerIdx = ((m_mpi == nullptr) || !UsingParallelTrain(i)) ? 0 : m_mpi->CurrentNodeRank();
size_t dropoutRandSeedBase = (parallelWorkerIdx * m_maxEpochs) + i;
ComputationNetwork::SetDropoutRate<ElemType>(net, criterionNodes[0], m_dropoutRates[i], prevDropoutRate, dropoutRandSeedBase);
size_t randSeedBase = (parallelWorkerIdx * m_maxEpochs) + i;
ComputationNetwork::SetDropoutRate<ElemType>(net, criterionNodes[0], m_dropoutRates[i], prevDropoutRate);
ComputationNetwork::SetIRngUserSeed<ElemType>(net, criterionNodes[0], randSeedBase);
ComputationNetwork::SetBatchNormalizationTimeConstants<ElemType>(net, criterionNodes[0],
m_batchNormalizationTimeConstant[i], prevNormalizationTimeConstant,
m_batchNormalizationBlendTimeConstant[i], prevNormalizationBlendTimeConstant);

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -807,6 +807,23 @@ BOOST_AUTO_TEST_CASE(CompositeCNTKTextFormatReader_5x5_and_5x10_jagged_minibatch
false);
};
BOOST_AUTO_TEST_CASE(CNTKTextFormatReaderNoFirstMinibatchData)
{
HelperRunReaderTest<double>(
testDataPath() + "/Config/CNTKTextFormatReader/dense.cntk",
testDataPath() + "/Control/CNTKTextFormatReader/NonExistent.txt",
testDataPath() + "/Control/CNTKTextFormatReader/CNTKTextFormatReaderNoFirstMinibatchData_Output.txt",
"1x2",
"reader",
10, // epoch size
1, // mb size
10, // num epochs
1,
0,
1,
2);
};
BOOST_AUTO_TEST_SUITE_END()
} } } }

Просмотреть файл

@ -113,7 +113,7 @@ declare -a mklFiles=("libmkl_cntk_p.so" "libiomp5.so")
declare -a opencvFiles=("libopencv_core.so.3.1" "libopencv_imgproc.so.3.1" "libopencv_imgproc.so.3.1" "libopencv_imgcodecs.so.3.1")
# libzip
declare -a libzipFiles=("libzip.so")
declare -a libzipFiles=("libzip.so.4")
# CUDA
declare -a cudaFiles=("libcudart.so.7.5" "libcublas.so.7.5" "libcurand.so.7.5" "libcusparse.so.7.5")

0
bindings/python/swig_install.sh Executable file → Normal file
Просмотреть файл