Conflicts:
	.gitignore
	Common/Include/fileutil.h
This commit is contained in:
ascii991218 2015-01-17 21:40:37 +08:00
Родитель b5c41e6516 459ff2ded1
Коммит 0443dc818d
28 изменённых файлов: 5202 добавлений и 148 удалений

1
.gitignore поставляемый
Просмотреть файл

@ -166,3 +166,4 @@ bin/
LOG
*.log
core
*.lyx#

Просмотреть файл

@ -31,6 +31,12 @@
#endif//__WINDOWS__
#include <stdio.h>
#ifdef MPI_SUPPORT
#include "mpi.h"
#endif
extern int myRank;
extern int numProcs;
// ---------------------------------------------------------------------------
// BestGpu class
// ---------------------------------------------------------------------------
@ -86,6 +92,7 @@ public:
void Init();
void SetAllowedDevices(const std::vector<int>& devices); // only allow certain GPUs
bool DeviceAllowed(int device);
void DisallowDevice(int device) { m_allowedDevices &= ~(1 << device); }
void AllowAll(); // reset to allow all GPUs (no allowed list)
bool UseMultiple(); // using multiple GPUs?
int GetDevice(BestGpuFlags flags = bestGpuNormal); // get a single device
@ -120,8 +127,39 @@ DEVICEID_TYPE DeviceFromConfig(const ConfigParameters& config)
}
if (!_stricmp(val.c_str(), "Auto"))
{
#ifdef MPI_SUPPORT
// make sure deviceId is unique among processes on the same machine
g_bestGpu->AllowAll();
std::string MyName(getenv("COMPUTERNAME"));
for (int i = 0; i < numProcs; i++)
{
DEVICEID_TYPE yourDeviceId = deviceId;
if (myRank == i)
{
std::vector<int> devices = g_bestGpu->GetDevices(1);
deviceId = yourDeviceId = (DEVICEID_TYPE)devices[0];
}
MPI_Bcast(&yourDeviceId, 1, MPI_INT, i, MPI_COMM_WORLD);
{
INT32 YourSize = (INT32)MyName.length();
MPI_Bcast(&YourSize,1,MPI_INT,i,MPI_COMM_WORLD);
vector<char> YourName(YourSize+1);
if (myRank == i)
copy(MyName.begin(), MyName.end(), YourName.begin());
MPI_Bcast(YourName.data(), YourSize + 1, MPI_CHAR, i, MPI_COMM_WORLD);
if (myRank != i)
{
if (!_strcmpi(MyName.data(), YourName.data()))
{
g_bestGpu->DisallowDevice(yourDeviceId);
}
}
}
}
#else
std::vector<int> devices = g_bestGpu->GetDevices(1);
deviceId = (DEVICEID_TYPE)devices[0];
#endif
}
else if (!_stricmp(val.c_str(), "All"))
{
@ -466,6 +504,9 @@ void BestGpu::QueryNvmlData()
}
}
if (curPd == NULL)
continue;
// Get the memory usage, will only work for TCC drivers
result = nvmlDeviceGetMemoryInfo(device, &memory);
if (NVML_SUCCESS != result)

Просмотреть файл

@ -379,6 +379,8 @@ template <> const wchar_t* GetFormatString(float);
template <> const wchar_t* GetFormatString(double);
template <> const wchar_t* GetFormatString(size_t);
template <> const wchar_t* GetFormatString(long long);
template <> const wchar_t* GetFormatString(const char*);
template <> const wchar_t* GetFormatString(const wchar_t*);
// GetScanFormatString - get the format string for a particular type
template <typename T>

Просмотреть файл

@ -78,8 +78,10 @@ template <> const wchar_t* GetFormatString(unsigned int) {return L" %u";}
//template <> const wchar_t* GetFormatString(unsigned long) {return L" %lu";}
template <> const wchar_t* GetFormatString(float) {return L" %.9g";}
template <> const wchar_t* GetFormatString(double) {return L" %.17g";}
template <> const wchar_t* GetFormatString(size_t) { return L" %llu"; }
template <> const wchar_t* GetFormatString(size_t) {return L" %llu";}
template <> const wchar_t* GetFormatString(long long) {return L" %lli";}
template <> const wchar_t* GetFormatString(const char*) {return L" %hs";}
template <> const wchar_t* GetFormatString(const wchar_t*) {return L" %ls";}
// ----------------------------------------------------------------------------
// fgetText() specializations for fwscanf differences: get a value from a text file

Просмотреть файл

@ -50,6 +50,17 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_convertLabelsToTargets = false;
m_numberOfuttsPerMinibatch = readerConfig("nbruttsineachrecurrentiter", "1");
if (m_numberOfuttsPerMinibatch < 1)
{
LogicError("nbrUttsInEachRecurrentIter cannot be less than 1.");
}
if (!m_truncated && m_numberOfuttsPerMinibatch != 1)
{
LogicError("nbrUttsInEachRecurrentIter has to be 1 if Truncated is set to false.");
}
m_actualnumberOfuttsPerMinibatch = m_numberOfuttsPerMinibatch;
m_sentenceEnd.assign(m_numberOfuttsPerMinibatch, true);
m_processedFrame.assign(m_numberOfuttsPerMinibatch, 0);

Просмотреть файл

@ -64,6 +64,7 @@ template<class ElemType>
void CNTKEval<ElemType>::LoadModel(const std::wstring& modelFileName)
{
DEVICEID_TYPE deviceId = DeviceFromConfig(m_config);
fprintf(stderr, "DeviceID=%d\n", (int)deviceId);
if (m_net != NULL)
delete m_net;
m_net = new ComputationNetwork<ElemType>(deviceId);

Просмотреть файл

@ -118,9 +118,10 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
//dump all nodes in the network to file
void DumpAllNodesToFile(const bool printValues, const std::wstring outputFile)
void DumpAllNodesToFile(const bool printValues, const std::wstring outputFile, const bool validateBeforeDump = true)
{
ValidateNetwork(); //some internal values in the nodes are computed during validation
if (validateBeforeDump)
ValidateNetwork(); //some internal values in the nodes are computed during validation
File fstream(outputFile, FileOptions::fileOptionsText | FileOptions::fileOptionsWrite);
@ -1745,8 +1746,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
for (ComputationNodePtr node : FinalCriterionNodes())
{
PrintComputationTree(node, false);
if(!allowFragment) FormRecurentLoops(node);
PrintComputationTree(node, false);
size_t actualMBSize = this->GetActualMBSize();
this->SetActualMiniBatchSize(actualMBSize);
ValidateNetwork(node);
@ -1759,8 +1760,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// now output nodes
if (OutputNodes().size() > 0)
{
for (ComputationNodePtr node : OutputNodes())
ValidateNetwork(node);
for (ComputationNodePtr node : OutputNodes())
{
if (!allowFragment) FormRecurentLoops(node);
ValidateNetwork(node);
}
}
else if (!allowFragment)
{
@ -1769,8 +1773,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// now evaluation nodes
if (EvaluationNodes().size() > 0)
{
for (ComputationNodePtr node : EvaluationNodes())
ValidateNetwork(node);
for (ComputationNodePtr node : EvaluationNodes())
{
if (!allowFragment) FormRecurentLoops(node);
ValidateNetwork(node);
}
}
}
@ -2039,6 +2046,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
std::vector<ComputationNodePtr> sourceLoopNodes;
getStrongSCC(rootNode);
std::list<ComputationNodePtr>& nodes = GetEvalOrder(rootNode, sourceLoopNodes);
std::list<ComputationNodePtr> nodesForGrad;
/// debug purpose
for (auto iter = m_recurrentInfo.begin(); iter != m_recurrentInfo.end(); iter++)
@ -2080,7 +2088,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
for (auto iter = m_recurrentInfo.begin(); iter != m_recurrentInfo.end(); iter++)
{
// sort the recurrent nodes in their ascending name, which is the same as visiting nodes in G^R
if ((*iter).m_recurrentNodes.size() > 1 && (*iter).m_recurrentNodesForForward.size() == 0)
(*iter).m_recurrentNodesForForward.clear();
if ((*iter).m_recurrentNodes.size() > 1)
{
std::list<ComputationNodePtr> result;
std::unordered_set<ComputationNodePtr> visited;
@ -2112,7 +2121,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
result.pop_front();
}
(*iter).m_recurrentNodes = (*iter).m_recurrentNodesForForward;
}
}
@ -2124,12 +2133,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
std::list<ComputationNodePtr> noRecurrentNodes;
noRecurrentNodes = rootNode->ReshuffleNodes(recurrentNodes);
ReorderLoops(nodes, recurrentNodes, noRecurrentNodes);
nodes.sort(IsSmaller);
ReorderLoops(nodes, recurrentNodes, noRecurrentNodes);
m_cacheEvalOrders[rootNode] = nodes;
nodesForGrad = nodes;
nodesForGrad.reverse();
m_cacheGradientCalcOrders[rootNode] = nodesForGrad;
#ifdef DISPLAY_DEBUG
fprintf(stderr, "Reordered nodes\n");
@ -2149,13 +2161,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
std::list<ComputationNodePtr> vTmp;
std::list<ComputationNodePtr> vRecurrentTmp;
int prevId = -1;
//int prevId = -1;
vector<bool> accessed;
accessed.assign(m_recurrentInfo.size(),false);
for (auto nodeIter=nodes.begin(); nodeIter != nodes.end(); nodeIter++)
{
int iId = FindInRecurrentLoop(*nodeIter);
if (iId >= 0)
{
if (prevId != iId && vRecurrentTmp.size() > 0)
if (! accessed[iId])
{
newList.insert(newList.end(), m_recurrentInfo[iId].m_recurrentNodes.begin(), m_recurrentInfo[iId].m_recurrentNodes.end());
accessed[iId] = true;
}
/*if (prevId != iId && vRecurrentTmp.size() > 0)
{
newList.insert(newList.end(), vRecurrentTmp.begin(), vRecurrentTmp.end());
vRecurrentTmp.clear();
@ -2169,11 +2190,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
vRecurrentTmp.push_back(*nodeIter);
prevId = iId;
prevId = iId;*/
}
else
{
vTmp.push_back(*nodeIter);
//vTmp.push_back(*nodeIter);
newList.push_back(*nodeIter);
}
}

Просмотреть файл

@ -4743,14 +4743,18 @@ protected: \
virtual void EvaluateThisNode(const size_t timeIdxInSeq)
{
Matrix<ElemType> sliceInput0Value = Inputs(0)->FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
Matrix<ElemType> sliceOutputValue = m_functionValues.ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
Matrix<ElemType> sliceOutputValue = Matrix <ElemType>();
Matrix<ElemType> sliceMask = Matrix<ElemType>();
if(m_dropoutRate > 0)
{
m_maskOfDropout.Resize(m_functionValues.GetNumRows(), m_functionValues.GetNumCols());
FunctionValues().Resize(Inputs(0)->FunctionValues().GetNumRows(), Inputs(0)->FunctionValues().GetNumCols());
m_maskOfDropout.Resize(Inputs(0)->FunctionValues().GetNumRows(), Inputs(0)->FunctionValues().GetNumCols());
sliceMask = m_maskOfDropout.ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
}
}
sliceOutputValue = FunctionValues().ColumnSlice(timeIdxInSeq * m_samplesInRecurrentStep, m_samplesInRecurrentStep);
EvaluateThisNodeS(m_dropoutRate, m_randomSeed, sliceOutputValue, sliceMask, sliceInput0Value);
}

Просмотреть файл

@ -45,10 +45,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
IExecutionEngine<ElemType>* executionEngine,
const std::wstring& networkConfig,
const std::string& configParams,
const std::wstring& dumpFileName,
DEVICEID_TYPE deviceId=AUTOPLACEMATRIX)
{
m_executionEngine=executionEngine;
m_networkConfig=networkConfig;
m_dumpFileName = dumpFileName;
m_initialConfig=configParams;
m_deviceId=deviceId;
m_net=&(executionEngine->GetComputationNetwork());
@ -69,6 +71,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
ConfigParameters newConfig;
ConfigValue networkConfig = config("networkDescription","");
ConfigValue dumpFileName = config("dumpFileName", "");
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
unsigned long randomSeedOffset = config("randomSeedOffset","0");
auto executionEngine = new SynchronousExecutionEngine<ElemType>(deviceId, randomSeedOffset);
@ -142,7 +145,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
}
Init(executionEngine, networkConfig, newConfig, deviceId);
Init(executionEngine, networkConfig, newConfig, dumpFileName, deviceId);
}
virtual ~NDLBuilder()
@ -196,7 +199,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_script.FileParse(fileContents);
NDLUtil<ElemType> ndlUtil(m_net);
ndlUtil.ProcessNDLScript(&m_script, ndlPassAll, nullptr, true);
ndlUtil.ProcessNDLScript(&m_script, ndlPassAll, nullptr, true, m_dumpFileName);
}
// SetFromConfig - Set the NDL script from a configuration string value
@ -222,6 +225,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
ComputationNetwork<ElemType>* m_net;
IExecutionEngine<ElemType>* m_executionEngine;
std::wstring m_networkConfig;
std::wstring m_dumpFileName;
std::string m_initialConfig;
DEVICEID_TYPE m_deviceId;

Просмотреть файл

@ -88,7 +88,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// skipThrough - [in/out] for iterative processing, a pointer to an array of NDLNode*, one for each pass
// the pointer will be updated to last node processed for that pass, can be NULL if all node processing is desired
// fullValidate - validate as a complete network? (false if this might be a snippet of a full network)
void ProcessNDLScript(NDLScript<ElemType>* script, NDLPass ndlPassUntil=ndlPassAll, NDLNode<ElemType>** skipThrough=nullptr, bool fullValidate = false)
void ProcessNDLScript(NDLScript<ElemType>* script, NDLPass ndlPassUntil = ndlPassAll, NDLNode<ElemType>** skipThrough = nullptr, bool fullValidate = false, const std::wstring& dumpFileName = L"")
{
// if we don't have a script yet, don't bother
if (script == nullptr)
@ -104,7 +104,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
for (NDLPass ndlPass=ndlPassInitial;ndlPass <= ndlPassUntil;++ndlPass)
{
NDLNode<ElemType>* skipThroughNode = skipThrough?*skipThrough:nullptr;
lastNode = ProcessPassNDLScript(script, ndlPass, skipThroughNode, fullValidate);
lastNode = ProcessPassNDLScript(script, ndlPass, skipThroughNode, fullValidate, dumpFileName);
if (skipThrough)
{
*skipThrough = lastNode;
@ -119,13 +119,15 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// skipThrough - for iterative processing, skip through this node in the script (used for in-line MEL processing)
// fullValidate - validate as a complete network? (false if this might be a snippet of a full network)
// returns: last NDL node processed
NDLNode<ElemType>* ProcessPassNDLScript(NDLScript<ElemType>* script, NDLPass ndlPass, NDLNode<ElemType>* skipThrough=nullptr, bool fullValidate = false)
NDLNode<ElemType>* ProcessPassNDLScript(NDLScript<ElemType>* script, NDLPass ndlPass, NDLNode<ElemType>* skipThrough = nullptr, bool fullValidate = false, const std::wstring& dumpFileName = L"")
{
if (ndlPass == ndlPassFinal)
{
// make sure to clear the caches so we pick up the new nodes
m_net->ClearCaches();
// validate the network
if (dumpFileName != L"")
m_net->DumpAllNodesToFile(false, dumpFileName, false);
m_net->ValidateNetwork(!fullValidate);
}
SynchronousNodeEvaluator<ElemType> ndlEvaluator(*m_net);

Просмотреть файл

@ -31,15 +31,33 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
void DecimateMinibatch(std::map<std::wstring, MSR::CNTK::Matrix<ElemType>*> &mb)
{
size_t rv = 0;
if ( numProcs > 1 ) for (auto it = mb.begin(); it != mb.end(); ++it)
{
MSR::CNTK::Matrix<ElemType> &mat = *(it->second);
size_t nCols = mat.GetNumCols();
size_t col_start = (nCols * myRank)/ numProcs;
size_t col_start = (nCols * myRank) / numProcs;
size_t col_end = (nCols*(myRank + 1)) / numProcs;
if (col_end > nCols) col_end = nCols; // this shouldn't happen
MSR::CNTK::Matrix<ElemType> tmp = mat.ColumnSlice(col_start, col_end - col_start);
mat.SetValue(tmp);
if (col_end == col_start)
{
MSR::CNTK::Matrix<ElemType> tmp(mat.GetNumRows(), 0, AUTOPLACEMATRIX, DENSE);
mat.SetValue(tmp);
}
else
{
MSR::CNTK::Matrix<ElemType> tmp = mat.ColumnSlice(col_start, col_end - col_start);
mat.SetValue(tmp);
}
if (0 == rv)
{
rv = mat.GetNumCols();
}
else
{
if (rv != mat.GetNumCols())
throw std::logic_error("Uneven number of columns among inputs.");
}
}
}
@ -537,9 +555,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
net.BuildPTaskGraph();
}
for (int i=int(startEpoch); i<int(m_maxEpochs); i++)
for (int i = int(startEpoch); i < int(m_maxEpochs); i++)
{
auto t_start_epoch = clock();
auto t_start_epoch = clock();
// set other information to inputMatrices that can contrain information
// used for class-based LM for clustring information
@ -547,24 +565,24 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//set dropout rate
SetDropoutRate(net, criterionNodes[0], m_dropoutRates[i], prevDropoutRate, dropOutSeed);
//learning rate adjustment
if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::None || (m_learningRatesPerSample.size() > 0 && m_learningRatesPerSample.size() > i))
{
learnRatePerSample = m_learningRatesPerSample[i];
{
learnRatePerSample = m_learningRatesPerSample[i];
setMomentum(m_momentumInputPerMB[i]);
}
else if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::SearchBeforeEpoch)
}
else if (m_autoLearnRateSearchType == LearningRateSearchAlgorithm::SearchBeforeEpoch)
{
ElemType largestPrevLearnRatePerSample = prevLearnRates[0];
for (int j=1; j<m_numPrevLearnRates; j++)
for (int j = 1; j < m_numPrevLearnRates; j++)
{
largestPrevLearnRatePerSample = max(largestPrevLearnRatePerSample, prevLearnRates[j]);
}
//return a reasonable learning rate based on the initial mbsize
learnRatePerSample = SearchLearnRateBeforeEpoch(net, refNet, refNode, i, learnRatePerSample, trainSetDataReader, FeatureNodes,
labelNodes,criterionNodes,evaluationNodes, inputMatrices,learnableNodes,smoothedGradients, learnRateInitialized, largestPrevLearnRatePerSample);
labelNodes, criterionNodes, evaluationNodes, inputMatrices, learnableNodes, smoothedGradients, learnRateInitialized, largestPrevLearnRatePerSample);
prevLearnRates[i % m_numPrevLearnRates] = learnRatePerSample; //save per sample learn rate to support changeable mbsize
}
@ -573,18 +591,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
if (learnRatePerSample < m_minLearnRate)
{
fprintf(stderr, "Learn Rate Per Sample for Epoch[%d] = %.8g is less than minLearnRate %.8g. Training stops.\n", i+1, learnRatePerSample, m_minLearnRate);
fprintf(stderr, "Learn Rate Per Sample for Epoch[%d] = %.8g is less than minLearnRate %.8g. Training stops.\n", i + 1, learnRatePerSample, m_minLearnRate);
if (m_autoLearnRateSearchType != LearningRateSearchAlgorithm::None)
net.SaveToFile(m_modelPath);
break;
}
TrainOneEpoch(net, refNet, refNode, i, m_epochSize, trainSetDataReader, learnRatePerSample,FeatureNodes,labelNodes,
criterionNodes,evaluationNodes,inputMatrices, learnableNodes,smoothedGradients,
#ifdef MPI_SUPPORT
INT32 mySamples = (INT32)
#endif
TrainOneEpoch(net, refNet, refNode, i, m_epochSize, trainSetDataReader, learnRatePerSample, FeatureNodes, labelNodes,
criterionNodes, evaluationNodes, inputMatrices, learnableNodes, smoothedGradients,
epochCriterion, epochEvalErrors, totalSamplesSeen);
auto t_end_epoch = clock();
ElemType epochTime = ElemType(1.0)*(t_end_epoch-t_start_epoch)/(CLOCKS_PER_SEC);
ElemType epochTime = ElemType(1.0)*(t_end_epoch - t_start_epoch) / (CLOCKS_PER_SEC);
fprintf(stderr, "Finished Epoch[%d]: [Training Set] Train Loss Per Sample = %.8g ", i + 1, epochCriterion);
if (epochEvalErrors.size() == 1)
@ -604,21 +625,34 @@ namespace Microsoft { namespace MSR { namespace CNTK {
#ifdef MPI_SUPPORT
// model reduction and averaging
if ( numProcs > 0 )
for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++)
if (numProcs > 0)
{
ComputationNodePtr node = (*nodeIter);
Microsoft::MSR::CNTK::Matrix<ElemType> &mat = node->FunctionValues();
ElemType *px = mat.CopyToArray();
size_t nx = mat.GetNumElements();
vector<ElemType> py = vector<ElemType>(nx, ElemType(0));
// TODO: Replace this with the reduction-shuffle-dance
MPI_Reduce(px, &(py[0]), (int)nx, sizeof(ElemType) == 4 ? MPI_FLOAT : MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (myRank == 0)
transform(py.begin(), py.end(), py.begin(), [](ElemType&val)->ElemType{return val / (ElemType)numProcs; });
MPI_Bcast(&(py[0]), nx, sizeof(ElemType) == 4 ? MPI_FLOAT : MPI_DOUBLE, 0, MPI_COMM_WORLD);
mat.SetValue(mat.GetNumRows(), mat.GetNumCols(), &(py[0]));
delete px;
ElemType factor; // weight for the parameter of my model
{
// compute total minibatch size
INT32 allSamples = 0;
MPI_Allreduce(&mySamples, &allSamples, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
if (allSamples == 0) allSamples = 1;
factor = (ElemType)mySamples / (ElemType)allSamples;
}
for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++)
{
ComputationNodePtr node = (*nodeIter);
Microsoft::MSR::CNTK::Matrix<ElemType> &mat = node->FunctionValues();
// weight model by relative size of minibatch samples (and number of processors, for averaging)
ElemType *px = mat.CopyToArray();
size_t nx = mat.GetNumElements();
transform(px, px + nx, px, [factor](ElemType&val)->ElemType{return val * factor; });
// TODO: Replace default Allreduce with the reduction-shuffle-dance
vector<ElemType> py = vector<ElemType>(nx, ElemType(0));
MPI_Allreduce(px, &(py[0]), (int)nx, sizeof(ElemType) == 4 ? MPI_FLOAT : MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
mat.SetValue(mat.GetNumRows(), mat.GetNumCols(), &(py[0]));
delete px;
}
}
#endif
@ -932,7 +966,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
LoadCheckPointInfo(baseModelEpoch, totalSamplesSeen, learnRate, smoothedGradients, prevCriterion);
}
void TrainOneEpoch(ComputationNetwork<ElemType>& net, ComputationNetwork<ElemType>& refNet, const ComputationNodePtr refNode,
size_t TrainOneEpoch(ComputationNetwork<ElemType>& net, ComputationNetwork<ElemType>& refNet, const ComputationNodePtr refNode,
const int epochNumber, const size_t epochSize,
IDataReader<ElemType>* trainSetDataReader, const ElemType learnRatePerSample,
const std::vector<ComputationNodePtr>& FeatureNodes,
@ -1006,6 +1040,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
UpdateEvalTimeStamps(labelNodes);
size_t actualMBSize = net.GetActualMBSize();
if (0 == actualMBSize)
continue;
net.SetActualMiniBatchSize(actualMBSize);
net.SetActualNbrSlicesInEachRecIter(trainSetDataReader->NumberSlicesInEachRecurrentIter());
@ -1151,6 +1187,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
epochEvalErrors[i] = (const ElemType)localEpochEvalErrors(0,i);
}
}
return totalEpochSamples;
}
public:
// UpdateWeightsS - static version of UpdateWeights()

Просмотреть файл

@ -200,12 +200,6 @@
</CustomBuildStep>
</ItemDefinitionGroup>
<ItemGroup>
<Text Include="config.txt">
<DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</DeploymentContent>
<DeploymentContent Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</DeploymentContent>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
</Text>
<Text Include="DefaultMacros.txt" />
<Text Include="modelEditor.txt" />
<Text Include="modelEditorFromScratch.txt" />

Просмотреть файл

@ -146,9 +146,6 @@
<Text Include="modelEditorFromScratch.txt">
<Filter>Model Editing</Filter>
</Text>
<Text Include="config.txt">
<Filter>Main</Filter>
</Text>
<Text Include="DefaultMacros.txt">
<Filter>Main</Filter>
</Text>

Просмотреть файл

@ -95,6 +95,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_numRows = 0;
m_numCols = 0;
m_elemSizeAllocated = 0;
m_compIndexSize = 0;
m_externalBuffer = false;
m_computeDevice = CPUDEVICE;
m_nz = 0;
@ -181,11 +182,13 @@ namespace Microsoft { namespace MSR { namespace CNTK {
throw std::logic_error("CPUSparseMatrix: unsupported SetValue() call.");
}
if(m_elemSizeAllocated < m_nz +1) {
throw std::logic_error("CPUSparseMatrix: allocated size is too small.");
if(m_elemSizeAllocated < m_nz +1) //automatic resize
{
Resize(m_numRows, m_numCols, m_nz + 100); //allocate 100 more elelemnts and keep existing values
}
if(rIdx < 0 || rIdx >= m_numRows) {
if(rIdx < 0 || rIdx >= m_numRows)
{
throw std::logic_error("CPUSparseMatrix: SetValue() invalid row id");
}
@ -228,43 +231,62 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
void CPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, size_t size)
void CPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, size_t numNZElemToReserve, const bool growOnly, const bool keepExistingValues)
{
m_nz = 0;
m_colIdx = -1;
size_t newCompIndexSize = (numCols > numRows ? numCols : numRows) + 1;
bool reallocate = (m_elemSizeAllocated < numNZElemToReserve || (m_elemSizeAllocated > numNZElemToReserve && !growOnly) || m_compIndexSize < newCompIndexSize);
m_numRows = numRows;
m_numCols = numCols;
if(m_elemSizeAllocated < size)
m_numCols = numCols;
if (reallocate)
{
m_elemSizeAllocated = size;
if(m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
if (m_format == MatrixFormat::matrixFormatSparseCSC || m_format == MatrixFormat::matrixFormatSparseCSR)
{
if(m_pArray != NULL)
ElemType *pArray = new ElemType[numNZElemToReserve];
size_t *unCompIndex = new size_t[numNZElemToReserve];
size_t *compIndex = new size_t[newCompIndexSize];
if (keepExistingValues && m_nz > 0)
{
memcpy(pArray, m_pArray, sizeof(ElemType)*m_nz);
memcpy(unCompIndex, m_unCompIndex, sizeof(size_t)*m_nz);
memcpy(compIndex, m_compIndex, sizeof(size_t)*m_compIndexSize);
}
if (m_pArray != NULL)
delete[] m_pArray;
if(m_unCompIndex != NULL)
if (m_unCompIndex != NULL)
delete[] m_unCompIndex;
if(m_compIndex != NULL)
delete[] m_compIndex;
//int len = m_format == MatrixFormat::matrixFormatSparseCSC ? numCols : numRows;
size_t len = numCols > numRows ? numCols : numRows;
m_pArray = new ElemType[size];
m_unCompIndex = new size_t[size];
m_compIndex = new size_t[len+1];
}
if (m_compIndex != NULL)
delete[] m_compIndex;
m_pArray = pArray;
m_unCompIndex = unCompIndex;
m_compIndex = compIndex;
}
else if(m_format == MatrixFormat::matrixFormatSparseBlockCol || m_format == MatrixFormat::matrixFormatSparseBlockRow)
{
if(m_blockVal != NULL)
ElemType *blockVal = new ElemType[numNZElemToReserve];
size_t *blockIds = new size_t[newCompIndexSize];
if (keepExistingValues && m_elemSizeAllocated > 0)
{
memcpy(blockVal, m_blockVal, sizeof(ElemType)*m_elemSizeAllocated);
memcpy(blockIds, m_blockIds, sizeof(size_t)*m_compIndexSize);
}
if (m_blockVal != NULL)
delete[] m_blockVal;
if(m_blockIds != NULL)
delete[] m_blockIds;
size_t max = numCols > numRows ? numCols : numRows;
m_blockVal = new ElemType[size];
m_blockIds = new size_t[max];
m_blockVal = blockVal;
m_blockIds = blockIds;
}
m_elemSizeAllocated = numNZElemToReserve;
m_compIndexSize = newCompIndexSize;
}
}
@ -274,6 +296,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
m_nz = 0;
m_colIdx = -1;
m_compIndexSize = 0;
m_blockSize = 0;
}

Просмотреть файл

@ -86,7 +86,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
int GetComputeDeviceId() const {return -1;}
void Resize(const size_t numRows, const size_t numCols, size_t size = 0);
void Resize(const size_t numRows, const size_t numCols, size_t numNZElemToReserve = 0, const bool growOnly = true, const bool keepExistingValues = true);
void Reset();
public:
@ -133,6 +133,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
private:
int m_colIdx; //used to SetValue()
size_t m_compIndexSize;
//non-zero values are stored in m_pArray
size_t *m_unCompIndex; //row/col ids in CSC/CSR format
size_t *m_compIndex; //begin ids of col/row in CSC/CSR format

Просмотреть файл

@ -85,6 +85,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
void SetOwnBuffer(bool own) {m_externalBuffer = !own;}
wchar_t* GetMatrixName() const { return m_matrixName; }
size_t NzCount() const {return m_nz;}
void SetNzCount(const size_t nz) { m_nz = nz; }
size_t GetSizeAllocated() const {return m_elemSizeAllocated; }
void SetMatrixName(const wchar_t* s)
{

Просмотреть файл

@ -130,6 +130,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
deepCopy.PrepareDevice();
Resize(deepCopy.m_numRows, deepCopy.m_numCols, deepCopy.m_nz, deepCopy.m_format);
m_nz = deepCopy.m_nz;
CUDACALL(cudaMemcpy(NzValues(), deepCopy.NzValues(), NzSize(), cudaMemcpyDeviceToDevice));
CUDACALL(cudaMemcpy(MajorIndexLocation(), deepCopy.MajorIndexLocation(), MajorIndexSize(), cudaMemcpyDeviceToDevice));
CUDACALL(cudaMemcpy(SecondaryIndexLocation(), deepCopy.SecondaryIndexLocation(), SecondaryIndexSize(), cudaMemcpyDeviceToDevice));
@ -199,6 +200,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//we need to do conversion because CPUSparseMatrix uses size_t for indexes while GPUSparseMatrix uses int
GPUSPARSE_INDEX_TYPE *h_CSRRow, *h_Col;
cpuSparseMatrix.Resize(GetNumRows(), GetNumCols(), GetNumNZElements());
cpuSparseMatrix.SetNzCount(GetNumNZElements());
PrepareDevice();
h_CSRRow = new GPUSPARSE_INDEX_TYPE[m_numRows + 1];
@ -219,6 +221,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
//we need to do conversion because CPUSparseMatrix uses size_t for indexes while GPUSparseMatrix uses int
GPUSPARSE_INDEX_TYPE *h_CSCCol, *h_Row;
cpuSparseMatrix.Resize(GetNumRows(), GetNumCols(), GetNumNZElements());
cpuSparseMatrix.SetNzCount(GetNumNZElements());
PrepareDevice();
h_CSCCol = new GPUSPARSE_INDEX_TYPE[m_numCols + 1];
@ -322,6 +325,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
outMatrix.ChangeDeviceTo(GetComputeDeviceId());
outMatrix.Resize(m_numRows, m_numCols, m_nz,newFormat);
outMatrix.SetNzCount(m_nz);
if (oldFormat == matrixFormatSparseCSR && newFormat == matrixFormatSparseCSC)
{
@ -475,6 +479,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
CUDACALL(cudaEventDestroy(done));
Resize(numRows, numCols, nnzTotalDevHostPtr, matrixFormat);
SetNzCount(nnzTotalDevHostPtr);
CUDACALL(cudaEventCreate(&done));
@ -605,6 +610,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
void GPUSparseMatrix<ElemType>::ResizeAsAndCopyIndexFrom(const GPUSparseMatrix<ElemType>& a, const bool growOnly /*= true*/)
{
Resize(a.m_numRows, a.m_numCols, a.m_nz, a.m_format, growOnly);
SetNzCount(a.m_nz);
CUDACALL(cudaMemcpy(MajorIndexLocation(), a.MajorIndexLocation(), MajorIndexSize(), cudaMemcpyDeviceToDevice));
CUDACALL(cudaMemcpy(SecondaryIndexLocation(), a.SecondaryIndexLocation(), SecondaryIndexSize(), cudaMemcpyDeviceToDevice));
@ -630,30 +636,29 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
void GPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZ)
void GPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const bool growOnly)
{
Resize(numRows, numCols, numNZ, GetFormat(), true);
Resize(numRows, numCols, numNZElemToReserve, GetFormat(), growOnly);
}
//WARNING: When memory is reallocated existing information will be lost, workaround is to allocte enough memory from start.
//TODO: add keepExistingValues (default to true) argument so that the existing values are kept even after reallocation
template<class ElemType>
void GPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZ, const MatrixFormat matrixFormat, const bool growOnly /*= true*/)
void GPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly /*= true*/)
{
m_numRows = numRows;
m_numCols = numCols;
m_nz = numNZ;
if (matrixFormat == MatrixFormat::matrixFormatSparseCSC || matrixFormat == MatrixFormat::matrixFormatSparseCSR)
{
bool reallocate = (m_totalBufferSizeAllocated < BufferSizeNeeded() || (!growOnly && m_totalBufferSizeAllocated > BufferSizeNeeded()));
size_t bufferSizeNeeded = BufferSizeNeeded(numNZElemToReserve);
bool reallocate = (m_totalBufferSizeAllocated < bufferSizeNeeded || (!growOnly && m_totalBufferSizeAllocated > bufferSizeNeeded));
if (reallocate)
{
if (!OwnBuffer())
throw logic_error("Cannot Resize since the buffer is managed externally.");
m_totalBufferSizeAllocated = BufferSizeNeeded();
m_elemSizeAllocated = numNZ;
if (m_pArray != nullptr)
CUDACALL(cudaFree(m_pArray));
if (m_block2Id != nullptr)
@ -663,21 +668,29 @@ namespace Microsoft { namespace MSR { namespace CNTK {
PrepareDevice();
CUDACALL(cudaMalloc((void **)&m_pArray, m_totalBufferSizeAllocated));
CUDACALL(cudaMalloc((void **)&m_pArray, bufferSizeNeeded));
CUDACALL(cudaMalloc((void **)&m_block2Id, sizeof(size_t)*(numCols * 2)));
CUDACALL(cudaMalloc((void **)&m_block2UniqId, sizeof(size_t)*(numCols * 2)));
m_totalBufferSizeAllocated = bufferSizeNeeded;
m_elemSizeAllocated = numNZElemToReserve;
}
}
else if (matrixFormat == MatrixFormat::matrixFormatSparseBlockCol || matrixFormat == MatrixFormat::matrixFormatSparseBlockRow)
{
if (m_blockVal != nullptr)
CUDACALL(cudaFree(m_blockVal));
if (m_blockIds != nullptr)
CUDACALL(cudaFree(m_blockIds));
PrepareDevice();
CUDACALL(cudaMalloc((void **)&m_blockVal, sizeof(ElemType)*numNZ));
int max = numCols > numRows ? numCols : numRows;
CUDACALL(cudaMalloc((void **)&m_blockIds, sizeof(size_t)*max));
if (m_elemSizeAllocated < numNZElemToReserve || (m_elemSizeAllocated > numNZElemToReserve && !growOnly))
{
if (m_blockVal != nullptr)
CUDACALL(cudaFree(m_blockVal));
if (m_blockIds != nullptr)
CUDACALL(cudaFree(m_blockIds));
PrepareDevice();
CUDACALL(cudaMalloc((void **)&m_blockVal, sizeof(ElemType)*numNZElemToReserve));
int max = numCols > numRows ? numCols : numRows;
CUDACALL(cudaMalloc((void **)&m_blockIds, sizeof(size_t)*max));
m_elemSizeAllocated = numNZElemToReserve;
}
}
else
NOT_IMPLEMENTED;
@ -701,6 +714,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
m_format = matrixFormatSparseCSR;
Resize(numRows, numCols, nz);
SetNzCount(nz);
cudaMemcpyKind kind = IsOnDevice ? cudaMemcpyDeviceToDevice : cudaMemcpyHostToDevice;
CUDACALL(cudaMemcpy(RowLocation(), h_CSRRow, RowSize(), kind));
@ -741,6 +755,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
SetComputeDeviceId(devId);
m_format = matrixFormatSparseCSC;
Resize(numRows, numCols, nz);
SetNzCount(nz);
cudaMemcpyKind kind = IsOnDevice ? cudaMemcpyDeviceToDevice : cudaMemcpyHostToDevice;
CUDACALL(cudaMemcpy(RowLocation(), h_Row, RowSize(), kind));
@ -792,6 +807,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
m_format = matrixFormatSparseCSC;
Resize(m_numRows, m_numCols, labelSize);
SetNzCount(labelSize);
m_expandedSize = expandedSize;
m_blockSize = blockSize;
@ -1320,6 +1336,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// now we know the number of Non-zeros in the result set, set the output size
c.Resize(m, n, nnzC);
c.m_nz = nnzC;
CUDACALL(cudaMemcpy(c.SecondaryIndexLocation(),csrRowPtrC,c.SecondaryIndexSize(),cudaMemcpyDeviceToDevice));
// if we allocated the buffer, free it here
@ -1805,6 +1823,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
PrepareDevice();
GPUSparseMatrix c(GetFormat(), GetComputeDeviceId());
c.Resize(n, m, nnz, GetFormat());
c.m_nz = nnz;
cusparseHandle_t cusparseHandle = 0;
CUSPARSECALL(cusparseCreate(&cusparseHandle));
@ -2283,6 +2302,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
NOT_IMPLEMENTED;
us.Resize(rownum, colnum, nz);
us.SetNzCount(nz);
if (nz > 0)
{

Просмотреть файл

@ -77,7 +77,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
size_t MajorIndexSize() const { return sizeof(GPUSPARSE_INDEX_TYPE)*MajorIndexCount(); } // actual number of major index bytes in use
GPUSPARSE_INDEX_TYPE* SecondaryIndexLocation() const { return MajorIndexLocation() + m_elemSizeAllocated; } //this is the compressed index, col/row in CSC/CSR format
size_t SecondaryIndexCount() const
size_t SecondaryIndexCount(const size_t numNZ) const
{
if (m_format&matrixFormatCompressed)
{
@ -86,12 +86,21 @@ namespace Microsoft { namespace MSR { namespace CNTK {
return cnt;
}
else
return m_nz; // COO format
return numNZ; // COO format
}
size_t SecondaryIndexCount() const
{
return SecondaryIndexCount(m_nz);
}
// get size for compressed index
size_t SecondaryIndexSize() const { return (SecondaryIndexCount())*sizeof(GPUSPARSE_INDEX_TYPE); }
size_t BufferSizeNeeded() const { return NzSize() + MajorIndexSize() + SecondaryIndexSize(); }
size_t BufferSizeNeeded(const size_t numNZ) const
{ return sizeof(ElemType)*numNZ + sizeof(GPUSPARSE_INDEX_TYPE)*(numNZ + SecondaryIndexCount(numNZ)); }
size_t BufferSizeAllocated() const { return m_totalBufferSizeAllocated; }
ElemType* BufferPointer() const;
@ -107,8 +116,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
void SetValue(const GPUMatrix<ElemType>& denseMatrix);
void ResizeAsAndCopyIndexFrom(const GPUSparseMatrix<ElemType>& a, const bool growOnly = true);
void Resize(const size_t numRows, const size_t numCols, const size_t numNZ, const MatrixFormat matrixFormat, const bool growOnly = true); //matrix format will affect the size to allocate
void Resize(const size_t numRows, const size_t numCols, const size_t numNZ);
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const MatrixFormat matrixFormat, const bool growOnly = true); //matrix format will affect the size to allocate
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve, const bool growOnly = true);
GPUSparseMatrix<ElemType> Transpose() const;
void InplaceTranspose();

Просмотреть файл

@ -925,7 +925,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template<class ElemType>
void Matrix<ElemType>::SetValue(const size_t rIdx, const size_t cIdx, ElemType val)
{
DISPATCH_MATRIX_ON_FLAG(this,
DISPATCH_MATRIX_ON_FLAG_USECPU_4BOTH(this,
this,
(*m_CPUMatrix)(rIdx, cIdx) = val,
NOT_IMPLEMENTED,
@ -1150,26 +1150,14 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType>
void Matrix<ElemType>::Resize(const size_t numRows, const size_t numCols, bool growOnly /*=true*/)
void Matrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve /*=0*/, bool growOnly /*=true*/)
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->Resize(numRows,numCols,growOnly),
m_GPUMatrix->Resize(numRows,numCols,growOnly),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED
);
}
template<class ElemType>
void Matrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t allocatedSize)
{
DISPATCH_MATRIX_ON_FLAG(this,
this,
NOT_IMPLEMENTED,
NOT_IMPLEMENTED,
m_CPUSparseMatrix->Resize(numRows,numCols, allocatedSize),
m_GPUSparseMatrix->Resize(numRows,numCols, allocatedSize)
m_CPUSparseMatrix->Resize(numRows, numCols, numNZElemToReserve, growOnly),
m_GPUSparseMatrix->Resize(numRows, numCols, numNZElemToReserve, growOnly)
);
}
@ -3069,11 +3057,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
{
delete m_CPUSparseMatrix;
m_CPUSparseMatrix = NULL;
SetDataLocation(GPU, DENSE);
SetDataLocation(GPU, SPARSE);
}
else
{
SetDataLocation(BOTH, DENSE);
SetDataLocation(BOTH, SPARSE);
}
}
else //from GPU

Просмотреть файл

@ -112,8 +112,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
void RmsProp(Matrix<ElemType>& gradients, ElemType RMS_GAMMA, ElemType RMS_WGT_INC, ElemType RMS_WGT_MAX, ElemType RMS_WGT_DEC, ElemType RMS_WGT_MIN);
void Reshape(const size_t numRows, const size_t numCols);
void Resize(const size_t numRows, const size_t numCols, bool growOnly = true); //by default we only reallocate if need to grow
void Resize(const size_t numRows, const size_t numCols, const size_t allocatedSize); //for sparse matrix
void Resize(const size_t numRows, const size_t numCols, const size_t numNZElemToReserve = 0, bool growOnly = true); //by default we only reallocate if need to grow
size_t GetAllocatedSize() const;
void Reset(); //reset for sparse matrix

Просмотреть файл

@ -73,7 +73,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
}
template<class ElemType> void GPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZ, const MatrixFormat matrixFormat, const bool growOnly = true) {}//matrix format will affect the size to allocate
template<class ElemType> void GPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZ) {}
template<class ElemType> void GPUSparseMatrix<ElemType>::Resize(const size_t numRows, const size_t numCols, const size_t numNZ, const bool growOnly = true) {}
template<class ElemType> GPUMatrix<ElemType> GPUSparseMatrix<ElemType>::CopyToDenseMatrix() const
{

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Двоичный файл не отображается.

Просмотреть файл

@ -115,8 +115,8 @@ Jie Gao, Avner May, Baolin Peng, Andreas Stolcke, Malcolm Slaney
\end_layout
\begin_layout Date
MSR-TR-2014-112 (DRAFT v0.3: Nov.
23, 2014)
MSR-TR-2014-112 (DRAFT v0.4: Jan.
4, 2015)
\end_layout
\begin_layout Standard

Просмотреть файл

@ -3265,7 +3265,7 @@ status open
\begin_layout Plain Layout
Delay(m, [delayTime=1, defaultPastValue=0.1])
Delay(rows, [cols], m, [delayTime=1, defaultPastValue=0.1])
\end_layout
\end_inset
@ -3273,6 +3273,18 @@ Delay(m, [delayTime=1, defaultPastValue=0.1])
\end_layout
\begin_layout Itemize
rows - the number of rows in the delay node (and in the input matrix).
This parameter is needed because under some loopy conditions the dimensions
cannot be automatically inferred from the input matrix.
\end_layout
\begin_layout Itemize
cols - the number of columns in the delay node (and in the input matrix).
This parameter is optional since it will be set based on the minibatch
size during training and testing.
\end_layout
\begin_layout Itemize
m - input matrix to be delayed.
Each column is a sample.

Просмотреть файл

@ -1638,8 +1638,21 @@ loadBestModel
\end_layout
\begin_layout Itemize
learnRateAdjustInterval: determine the frequency of applying the learning
rate adjustment check.
\emph on
learnRateAdjustInterval
\begin_inset Index idx
status open
\begin_layout Plain Layout
learnRateAdjustInterval
\end_layout
\end_inset
\emph default
: determine the frequency of applying the learning rate adjustment check.
Default is 1 epoch.
If this value is set to a value larger than 1 the learning rate adjustment
will be based on the average criterion computed from the last learnRateAdjustIn
@ -1776,9 +1789,113 @@ gradUpdateType
: gradient update type.
Valid values are None (default, no special treatment to the gradient),
AdaGrad, and RmsProp.
When gradUpdateType equals to RmsProp, you can control the behavior of
the gradient update using following parameters:
\end_layout
\begin_deeper
\begin_layout Itemize
\emph on
rms_wgt_inc
\emph default
\begin_inset Index idx
status open
\begin_layout Plain Layout
rms_wgt_inc
\end_layout
\end_inset
: multiplicative increment of the learning rate scale.
Default is 1.2.
\end_layout
\begin_layout Itemize
\emph on
rms_wgt_dec
\emph default
\begin_inset Index idx
status open
\begin_layout Plain Layout
rms_wgt_dec
\end_layout
\end_inset
: multiplicative decrement of the learning rate scale.
Default is 0.75.
\end_layout
\begin_layout Itemize
\emph on
rms_wgt_max
\emph default
\begin_inset Index idx
status open
\begin_layout Plain Layout
rms_wgt_max
\end_layout
\end_inset
: maximum learning rate scale allowed.
A value closer to 1 makes the learning rate adjustment more stable but
slower.
Default is 10.
\end_layout
\begin_layout Itemize
\emph on
rms_wgt_min
\emph default
\begin_inset Index idx
status open
\begin_layout Plain Layout
rms_wgt_min
\end_layout
\end_inset
: minimum learning rate scale allowed.
A value closer to 1 makes the learning rate adjustment more stable but
slower.
Default is 0.1.
\end_layout
\begin_layout Itemize
\emph on
rms_gamma
\emph default
\begin_inset Index idx
status open
\begin_layout Plain Layout
rms_gamma
\end_layout
\end_inset
: smoothing factor used to estimate the moving average of the variance.
The smaller the value, the quicker it forgets the past information.
Default is 0.99.
\end_layout
\end_deeper
\begin_layout Itemize
\emph on
@ -4366,6 +4483,60 @@ minibatchSize
– the minibatch size to use when creating the label mapping file.
\end_layout
\begin_layout Section
ConvertDBN Command
\begin_inset Index idx
status open
\begin_layout Plain Layout
ConvertDBN Command
\end_layout
\end_inset
\end_layout
\begin_layout Standard
This command is used to convert a model generated by Microsoft's dbn.exe
tool to a CNTK model.
This command is useful when you want to compare the performance of these
two tools (dbn.exe only supports simple fully connected deep neural networks),
port existing models trained with dbn.exe to CNTK, or if you want to use
the RBM pre-training which is available in dbn.exe but not in CNTK right
now.
The related parameters are
\end_layout
\begin_layout Itemize
modelPath
\begin_inset Index idx
status open
\begin_layout Plain Layout
modelPath
\end_layout
\end_inset
– the full path of the generated CNTK model.
\end_layout
\begin_layout Itemize
dbnModelPath
\begin_inset Index idx
status open
\begin_layout Plain Layout
dbnModelPath
\end_layout
\end_inset
– the full path of the model to be converted.
\end_layout
\begin_layout Section
Additional Top-Level Configurations
\end_layout

Просмотреть файл

@ -116,10 +116,10 @@ At the center of the CNTK is the ComputationNetwork class, which manages
the life span of computation nodes comprising the network and all the functions
operating at the network level such as forward computations and gradient
calculations.
To build a computational network you need to use one of the ComputationNetBuild
er classes that implement the IComputationNetBuilder interface.
To build a computational network you need to use one of the computational
network builder classes that implement the IComputationNetBuilder interface.
These classes include SimpleNetworkBuilder that supports building simple
layer-by-layer fully connected networks,
layer-by-layer fully connected networks and
\begin_inset Index idx
status open
@ -149,8 +149,7 @@ LSTM
\end_inset
) neural networks.
It also includes NDLNetworkBuilder that can build neural network, using
) RNNs, as well as NDLNetworkBuilder that builds neural networks, using
any computation node we have described in Section
\begin_inset CommandInset ref
LatexCommand ref
@ -181,7 +180,7 @@ IDataReader
\end_inset
is an interface for loading data and its transcriptions.
Different data file format requires different data readers.
Different data file formats require different data readers.
CNTK already implements the UCIFastReader and the BinaryReader that reads
in UCI data in either text or binary format, the HTKMLFReader that reads
in HTK/MLF speech data, the SequenceReader that is designed for language

Просмотреть файл

@ -159,7 +159,7 @@ key "Variable-Component-Deep-Neural-Network:2014"
Conventionally, one needs to design the network, derive the derivatives
needed to optimize the network, implement the algorithm, and then run the
experiments.
These steps are error pronoe and time consuming.
These steps are error prone and time consuming.
With CNTK, however in many cases, you only need to write a simple configuration
file.
The rest of this chapter describes the configuration file needed to implement
@ -819,7 +819,7 @@ status open
\begin_layout Plain Layout
cn.exe config=Simple.config
cn.exe configFile=Simple.config
\end_layout
\end_inset
@ -838,7 +838,7 @@ status open
\begin_layout Plain Layout
cn config=Simple.config
cn configFile=Simple.config
\end_layout
\end_inset