updating multiverso helper for the new matrix interface

This commit is contained in:
Qiwei Ye 2016-04-16 19:10:11 +08:00
Родитель 9760990506
Коммит 7323d7c519
6 изменённых файлов: 161 добавлений и 150 удалений

Просмотреть файл

@ -479,7 +479,6 @@ int wmainWithBS(int argc, wchar_t* argv[]) // called from wmain which is a wrapp
bool paralleltrain = config(L"parallelTrain", false);
if (paralleltrain)
mpi = MPIWrapper::GetInstance(true /*create*/);
}
g_shareNodeValueMatrices = config(L"shareNodeValueMatrices", false);

Просмотреть файл

@ -46,11 +46,11 @@ namespace Microsoft {
};
template<class ElemType = float>
class MultiversoWrapper
class MultiversoHelper
{
typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr;
public:
MultiversoWrapper(const std::list<ComputationNodeBasePtr> & learnableNodes,
MultiversoHelper(const std::list<ComputationNodeBasePtr> & learnableNodes,
int MPINodeNum,
bool isAsyncBuffered = true,
AdjustLearningRateatBeginning adjusttype = AdjustLearningRateatBeginning::None,
@ -74,7 +74,8 @@ namespace Microsoft {
m_cpuAsyncBuffer = new ElemType*[m_localCacheNumber];
#ifndef CPUONLY
//GPU asynchronous buffer
m_gpuAsyncBuffer = new Matrix<ElemType>**[m_localCacheNumber];
//m_gpuAsyncBuffer = new Matrix<ElemType>**[m_localCacheNumber];
m_gpuAsyncBuffer.resize(m_localCacheNumber);
//creat an communication stream for the data tranfer between GPU and CPU
CudaErrorCheck(cudaStreamCreate(&_commStream));
@ -91,9 +92,9 @@ namespace Microsoft {
MultiversoInit(learnableNodes);
}
~MultiversoWrapper()
~MultiversoHelper()
{
fprintf(stderr, "~MultiversoWrapper\n");
fprintf(stderr, "~MultiversoHelper\n");
fflush(stderr);
if (m_isUseAsyncBuffered && m_prefetchThread != nullptr && m_prefetchThread->joinable())
@ -126,11 +127,17 @@ namespace Microsoft {
{
ComputationNodePtr node = dynamic_pointer_cast<ComputationNode<ElemType>>(*nodeIter);
Matrix<ElemType> &mat = node->Value();
printf("here!2\n");
fflush(stdout);
#pragma warning( push )
#pragma warning( disable : 4238)
#ifndef CPUONLY
for (int j = 0; j < m_localCacheNumber; j++)
m_gpuAsyncBuffer[j][i] = new Matrix<ElemType>(mat);
m_gpuAsyncBuffer[j].push_back(mat.DeepClone());
//m_gpuAsyncBuffer[j][i] = mat.DeepClone();
#endif
#pragma warning( pop )
ElemType* px = m_cpuAsyncBuffer[0] + m_tableOffsets[i];
mat.CopyToArray(px, m_tableLength[i]);
}
@ -178,14 +185,14 @@ namespace Microsoft {
Microsoft::MSR::CNTK::Matrix<ElemType> &mat = node->Value();
#ifndef CPUONLY
//CNTK model -> GPU buffer
CudaErrorCheck(cudaMemcpy(m_gpuAsyncBuffer[m_bufferInUse][i]->BufferPointer(),
mat.BufferPointer(),
CudaErrorCheck(cudaMemcpy(m_gpuAsyncBuffer[m_bufferInUse][i].Data(),
mat.Data(),
mat.GetNumElements() * sizeof(ElemType),
cudaMemcpyDeviceToDevice));
//GPU buffer -> CNTK model
CudaErrorCheck(cudaMemcpy(mat.BufferPointer(),
m_gpuAsyncBuffer[m_cacheSwapIndex[m_bufferInUse]][i]->BufferPointer(),
CudaErrorCheck(cudaMemcpy(mat.Data(),
m_gpuAsyncBuffer[m_cacheSwapIndex[m_bufferInUse]][i].Data(),
mat.GetNumElements() * sizeof(ElemType),
cudaMemcpyDeviceToDevice));
#else
@ -205,7 +212,7 @@ namespace Microsoft {
m_prefetchThread = new thread([&](){
float factor = DecayCoefficient();
int t_cacheIdx = m_bufferInUse;
int deviceId = m_gpuAsyncBuffer[t_cacheIdx][0]->GetDeviceId();
int deviceId = m_gpuAsyncBuffer[t_cacheIdx][0].GetDeviceId();
CudaErrorCheck(cudaSetDevice(deviceId));
@ -214,8 +221,8 @@ namespace Microsoft {
ElemType * px = m_deltaArray + m_tableOffsets[widx];
//GPU buffer -> CPU buffer
CudaErrorCheck(cudaMemcpyAsync(px,
m_gpuAsyncBuffer[t_cacheIdx][widx]->BufferPointer(),
m_gpuAsyncBuffer[t_cacheIdx][widx]->GetNumElements() * sizeof(ElemType),
m_gpuAsyncBuffer[t_cacheIdx][widx].Data(),
m_gpuAsyncBuffer[t_cacheIdx][widx].GetNumElements() * sizeof(ElemType),
cudaMemcpyDeviceToHost,
_commStream));
}
@ -242,9 +249,9 @@ namespace Microsoft {
{
ElemType * py = m_cpuAsyncBuffer[t_cacheIdx] + m_tableOffsets[widx];
CudaErrorCheck(cudaMemcpyAsync(m_gpuAsyncBuffer[t_cacheIdx][widx]->BufferPointer(),
CudaErrorCheck(cudaMemcpyAsync(m_gpuAsyncBuffer[t_cacheIdx][widx].Data(),
py,
m_gpuAsyncBuffer[t_cacheIdx][widx]->GetNumElements() * sizeof(ElemType),
m_gpuAsyncBuffer[t_cacheIdx][widx].GetNumElements() * sizeof(ElemType),
cudaMemcpyHostToDevice,
_commStream));
}
@ -376,8 +383,13 @@ namespace Microsoft {
}
#ifndef CPUONLY
printf("here!1\n");
fflush(stdout);
for (int i = 0; i < m_localCacheNumber; i++)
m_gpuAsyncBuffer[i] = new Matrix<ElemType>*[m_tableCount];
//m_gpuAsyncBuffer[i] = new Matrix<ElemType>*[m_tableCount];
m_gpuAsyncBuffer[i].reserve(m_tableCount);
printf("here!2\n");
fflush(stdout);
//create pinned memory
for (int i = 0; i < m_localCacheNumber; ++i)
@ -433,7 +445,8 @@ namespace Microsoft {
ElemType ** m_cpuAsyncBuffer;
//GPU double buffer
Matrix<ElemType> *** m_gpuAsyncBuffer;
//Matrix<ElemType> ** m_gpuAsyncBuffer;
std::vector<std::vector<Matrix<ElemType> >> m_gpuAsyncBuffer;
int m_tableCount;
#ifndef CPUONLY
cudaStream_t _commStream;

Просмотреть файл

@ -12,10 +12,10 @@ namespace Microsoft {
};
template<class ElemType = float>
class MultiversoWrapper
class MultiversoHelper
{
public:
MultiversoWrapper(const std::list<ComputationNodeBasePtr> & learnableNodes,
MultiversoHelper(const std::list<ComputationNodeBasePtr> & learnableNodes,
int localWorkerNumber,
bool isPipeline = true,
AdjustLearningRateatBeginning adjusttype = AdjustLearningRateatBeginning::None,
@ -25,7 +25,7 @@ namespace Microsoft {
}
~MultiversoWrapper()
~MultiversoHelper()
{
}

Просмотреть файл

@ -345,27 +345,27 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
m_seqGammarCalcAMF, m_seqGammarCalcLMF, m_seqGammarCalcWP, m_seqGammarCalcbMMIFactor, m_seqGammarCalcUsesMBR);
}
//Multiverso Warpper for ASGD logic init
if (m_parallelizationMethod == ParallelizationMethod::DataParallelASGD)
{
m_multiverso = new MultiversoWrapper<ElemType>(learnableNodes,
g_mpi->NumNodesInUse(),
m_isPipeline,
m_adjustlearningrateatbeginning,
m_adjustcoefficient,
m_adjustnbminibatch,
m_traceLevel);
m_multiverso->InitModel(learnableNodes);
m_multiversoBarrier = false;
m_multiverso->WaitAll();
}
//Multiverso Warpper for ASGD logic init
if (m_parallelizationMethod == ParallelizationMethod::DataParallelASGD)
{
m_pMultiversoHelper = new MultiversoHelper<ElemType>(learnableNodes,
m_mpi->NumNodesInUse(),
m_isPipeline,
m_adjustlearningrateatbeginning,
m_adjustcoefficient,
m_adjustnbminibatch,
m_traceLevel);
m_pMultiversoHelper->InitModel(learnableNodes);
m_pMultiversoHelperBarrier = false;
m_pMultiversoHelper->WaitAll();
}
// --- MAIN EPOCH LOOP
for (int i = startEpoch; i < (int) m_maxEpochs; i++) // TODO: why is this an int, and not a size_t?
{
// Synchronize all ranks before proceeding to ensure that
// rank 0 has finished writing the previous model file
if (m_mpi != nullptr && GetParallelizationMethod() != ParallelizationMethod::DataParallelASGD)
if (m_mpi != nullptr && GetParallelizationMethod() != ParallelizationMethod::DataParallelASGD)
{
m_mpi->WaitAll();
}
@ -534,7 +534,7 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
if (validationSetDataReader != trainSetDataReader && validationSetDataReader != nullptr)
{
SimpleEvaluator<ElemType> evalforvalidation(net, m_mpi);
SimpleEvaluator<ElemType> evalforvalidation(net, m_mpi);
vector<wstring> cvSetTrainAndEvalNodes;
if (criterionNodes.size() > 0)
{
@ -547,7 +547,7 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
// BUGBUG: We should not use the training MB size. The training MB size is constrained by both convergence and memory. Eval is only constrained by memory.
vector<double> vScore = evalforvalidation.Evaluate(validationSetDataReader, cvSetTrainAndEvalNodes, m_mbSize[i]);
LOGPRINTF(stderr, "Finished Epoch[%2d of %d]: [Validation Set] TrainLossPerSample = %.8g", i + 1, (int) m_maxEpochs, vScore[0]);
LOGPRINTF(stderr, "Finished Epoch[%2d of %d]: [Validation Set] TrainLossPerSample = %.8g", i + 1, (int)m_maxEpochs, vScore[0]);
if (vScore.size() > 1)
{
fprintf(stderr, "; EvalErrPerSample = %.8g", vScore[1]);
@ -716,7 +716,7 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
// Synchronize all ranks before proceeding to ensure that
// rank 0 has finished writing the model file
// TODO[DataASGD]: should othet other rank waiting in async-mode
if (m_mpi != nullptr && GetParallazationMethod() != ParallelizationMethod::DataParallelASGD)
if (m_mpi != nullptr && GetParallelizationMethod() != ParallelizationMethod::DataParallelASGD)
{
m_mpi->WaitAll();
}
@ -738,7 +738,7 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
delete inputMatrices;
if (m_parallelizationMethod == ParallelizationMethod::DataParallelASGD)
{
delete m_multiverso;
delete m_pMultiversoHelper;
}
}
@ -795,9 +795,9 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
(epochNumber >= m_parallelizationStartEpochNum));
bool useModelAveraging = ((GetParallelizationMethod() == ParallelizationMethod::ModelAveragingSGD) &&
(epochNumber >= m_parallelizationStartEpochNum));
bool useASGD = ((m_parallelizationMethod == ParallelizationMethod::DataParallelASGD) &&
(epochNumber >= m_parallelizationStartEpochNum));
bool useParallelTrain = useGradientAggregation || useModelAveraging || useASGD;
bool useASGD = ((GetParallelizationMethod() == ParallelizationMethod::DataParallelASGD) &&
(epochNumber >= m_parallelizationStartEpochNum));
bool useParallelTrain = useGradientAggregation || useModelAveraging || useASGD;
// MA-related variables
size_t nSamplesSinceLastModelSync = 0;
@ -1053,8 +1053,8 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
for (size_t i = 0; i < epochEvalErrors.size(); i++)
epochEvalErrors[i] += m_gradHeader->evalErrors[i];
}
computeTimer.Stop();
computeTime += computeTimer.ElapsedSeconds();
computeTimer.Stop();
computeTime += computeTimer.ElapsedSeconds();
// update model parameters
if ((aggregateNumSamples > 0) && (learnRatePerSample > m_minLearnRate * 0.01))
{
@ -1091,38 +1091,37 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
bool synced = m_pMASGDHelper->OnArrivingAtSyncPoint(learnableNodes, smoothedGradients, nSamplesSinceLastModelSync);
if (synced)
{
nSamplesSinceLastModelSync = 0;
}
}
nSamplesSinceLastModelSync = 0;
}
}
// prepare break condition
if (useDistributedMBReading)
{
noMoreSamplesToProcess = !wasDataRead;
}
noMoreSamplesToProcess = !wasDataRead;
}
}
// using parameter server for parameter update
if (useASGD && m_mpi->NumNodesInUse() > 1)
{
if (GetParallelizationMethod() == ParallelizationMethod::DataParallelASGD && m_nEpochBarrier[epochNumber] > 0 && epochNumber % m_nEpochBarrier[epochNumber] == 0)
{
// simulating BSP
m_pMultiversoHelper->WaitAsyncBuffer();
m_pMultiversoHelper->WaitAll();
}
if (useASGD && g_mpi->NumNodesInUse() > 1)
{
if (m_parallelizationMethod == ParallelizationMethod::DataParallelASGD && m_nEpochBarrier[epochNumber] > 0 && epochNumber % m_nEpochBarrier[epochNumber] == 0)
{
m_multiverso->WaitAsyncBuffer();
m_multiverso->WaitAll();
}
// Determine if any samples were processed across any of the ranks
if (useDistributedMBReading)
{
noMoreSamplesToProcess = !wasDataRead;
}
// Determine if any samples were processed across any of the ranks
if (useDistributedMBReading)
{
noMoreSamplesToProcess = !wasDataRead;
}
size_t processedSamples = 0;
if (nSamplesSinceLastModelSync >= m_nFramesBetweenASGDSync[epochNumber])
{
m_multiverso->PushAndPullModel(learnableNodes);
processedSamples = nSamplesSinceLastModelSync;
nSamplesSinceLastModelSync = 0;
}
aggregateNumSamplesWithLabel = processedSamples;
if (nSamplesSinceLastModelSync >= m_nFramesBetweenASGDSync[epochNumber])
{
m_pMultiversoHelper->PushAndPullModel(learnableNodes);
nSamplesSinceLastModelSync = 0;
}
}
commTimer.Stop();
@ -1136,81 +1135,81 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
if (
#if 0 // output the first few to see if everything started right
numMBsRun <= 3 ||
numMBsRun <= 3 ||
#endif
numMBsRun % m_numMBsToShowResult == 0)
numMBsRun % m_numMBsToShowResult == 0)
{
// get the epoch Values updated
if (!useGradientAggregation)
{
timer.Restart();
epochCriterion = localEpochCriterion.Get00Element();
for (size_t i = 0; i < epochEvalErrors.size(); i++)
{
epochEvalErrors[i] = localEpochEvalErrors(0, i);
}
timer.Stop();
// Add the last trailing compute
totalTimeInMBs += timer.ElapsedSeconds();
}
double trainLossPerSample = (numSamplesLastMBs != 0) ? ((epochCriterion - epochCriterionLastMBs) / numSamplesLastMBs) : 0.0;
bool wasProgressPrinted = false;
if (epochNumber > 0 || (int) epochSize > 0)
{
// progress tracing for compute cluster management
double mbProg = 0.0;
int mbProgNumPrecision = 2;
if (m_maxComputedEpochSize != 0)
{
double numMBPerEpoch = (double) m_maxComputedEpochSize / (double) tunedMBSize;
mbProg = (double) numMBsRun / numMBPerEpoch;
mbProgNumPrecision = (int) ceil(log10(numMBPerEpoch / (double) m_numMBsToShowResult));
mbProgNumPrecision = max(mbProgNumPrecision - 2, 2);
}
wasProgressPrinted = ProgressTracing::TraceProgressPercentage(epochNumber, mbProg, false);
// progress tracing for regular log
string formatString = "%s Epoch[%2d of %d]-Minibatch[%4d-%4d, %2." + std::to_string(mbProgNumPrecision) + "f%%]: SamplesSeen = %d; TrainLossPerSample = " +
GeneratePaddedFloatOrExpFormat(11, 8, trainLossPerSample) + "; ";
SGDTrace(stderr, true, formatString.c_str(),
prefixMsg.c_str(), epochNumber + 1, m_maxEpochs, numMBsRun - m_numMBsToShowResult + 1,
numMBsRun, mbProg * 100, numSamplesLastMBs, trainLossPerSample);
}
else
{
wasProgressPrinted = ProgressTracing::TraceProgressPercentage(epochNumber, 0.0, false);
string formatString = "%s Epoch[%2d of %d]-Minibatch[%4d-%4d]: SamplesSeen = %d; TrainLossPerSample = " +
GeneratePaddedFloatOrExpFormat(11, 8, trainLossPerSample) + "; ";
SGDTrace(stderr, true, formatString.c_str(),
prefixMsg.c_str(), epochNumber + 1, m_maxEpochs, numMBsRun - m_numMBsToShowResult + 1,
numMBsRun, numSamplesLastMBs, trainLossPerSample);
m_maxComputedEpochSize = numMBsRun * numSamplesLastMBs / m_numMBsToShowResult;
}
double evalError = 0.0;
// get the epoch Values updated
if (!useGradientAggregation)
{
timer.Restart();
epochCriterion = localEpochCriterion.Get00Element();
for (size_t i = 0; i < epochEvalErrors.size(); i++)
{
evalError = (epochEvalErrors[i] - epochEvalErrorsLastMBs[i]) / numSamplesLastMBs;
string formatString = "EvalErr[%lu]PerSample = " + GeneratePaddedFloatOrExpFormat(0, 8, evalError) + "; ";
SGDTrace(stderr, false, formatString.c_str(), i, evalError);
epochEvalErrors[i] = localEpochEvalErrors(0, i);
}
timer.Stop();
string formatString = "TotalTime = " + GeneratePaddedFloatOrExpFormat(0, 4, totalTimeInMBs) + "s; SamplesPerSecond = %.1f\n";
SGDTrace(stderr, false, formatString.c_str(), totalTimeInMBs, numSamplesLastMBs / totalTimeInMBs);
// Add the last trailing compute
totalTimeInMBs += timer.ElapsedSeconds();
}
string statcis_formatString = "; ReadTime = " + GeneratePaddedFloatOrExpFormat(0, 5, readTime) + "s; ComputeTime = " +
GeneratePaddedFloatOrExpFormat(0, 5, computeTime) + "s; CommunicationTime = " +
GeneratePaddedFloatOrExpFormat(0, 5, commTime) + "s;\n";
SGDTrace(stderr, false, statcis_formatString.c_str(), readTime, computeTime, commTime);
double trainLossPerSample = (numSamplesLastMBs != 0) ? ((epochCriterion - epochCriterionLastMBs) / numSamplesLastMBs) : 0.0;
bool wasProgressPrinted = false;
if (epochNumber > 0 || (int)epochSize > 0)
{
// progress tracing for compute cluster management
if (wasProgressPrinted)
double mbProg = 0.0;
int mbProgNumPrecision = 2;
if (m_maxComputedEpochSize != 0)
{
ProgressTracing::TraceTrainLoss(trainLossPerSample);
double numMBPerEpoch = (double)m_maxComputedEpochSize / (double)tunedMBSize;
mbProg = (double)numMBsRun / numMBPerEpoch;
mbProgNumPrecision = (int)ceil(log10(numMBPerEpoch / (double)m_numMBsToShowResult));
mbProgNumPrecision = max(mbProgNumPrecision - 2, 2);
}
wasProgressPrinted = ProgressTracing::TraceProgressPercentage(epochNumber, mbProg, false);
// progress tracing for regular log
string formatString = "%s Epoch[%2d of %d]-Minibatch[%4d-%4d, %2." + std::to_string(mbProgNumPrecision) + "f%%]: SamplesSeen = %d; TrainLossPerSample = " +
GeneratePaddedFloatOrExpFormat(11, 8, trainLossPerSample) + "; ";
SGDTrace(stderr, true, formatString.c_str(),
prefixMsg.c_str(), epochNumber + 1, m_maxEpochs, numMBsRun - m_numMBsToShowResult + 1,
numMBsRun, mbProg * 100, numSamplesLastMBs, trainLossPerSample);
}
else
{
wasProgressPrinted = ProgressTracing::TraceProgressPercentage(epochNumber, 0.0, false);
string formatString = "%s Epoch[%2d of %d]-Minibatch[%4d-%4d]: SamplesSeen = %d; TrainLossPerSample = " +
GeneratePaddedFloatOrExpFormat(11, 8, trainLossPerSample) + "; ";
SGDTrace(stderr, true, formatString.c_str(),
prefixMsg.c_str(), epochNumber + 1, m_maxEpochs, numMBsRun - m_numMBsToShowResult + 1,
numMBsRun, numSamplesLastMBs, trainLossPerSample);
m_maxComputedEpochSize = numMBsRun * numSamplesLastMBs / m_numMBsToShowResult;
}
double evalError = 0.0;
for (size_t i = 0; i < epochEvalErrors.size(); i++)
{
evalError = (epochEvalErrors[i] - epochEvalErrorsLastMBs[i]) / numSamplesLastMBs;
string formatString = "EvalErr[%lu]PerSample = " + GeneratePaddedFloatOrExpFormat(0, 8, evalError) + "; ";
SGDTrace(stderr, false, formatString.c_str(), i, evalError);
}
string formatString = "TotalTime = " + GeneratePaddedFloatOrExpFormat(0, 4, totalTimeInMBs) + "s; SamplesPerSecond = %.1f\n";
SGDTrace(stderr, false, formatString.c_str(), totalTimeInMBs, numSamplesLastMBs / totalTimeInMBs);
string statcis_formatString = "; ReadTime = " + GeneratePaddedFloatOrExpFormat(0, 5, readTime) + "s; ComputeTime = " +
GeneratePaddedFloatOrExpFormat(0, 5, computeTime) + "s; CommunicationTime = " +
GeneratePaddedFloatOrExpFormat(0, 5, commTime) + "s;\n";
SGDTrace(stderr, false, statcis_formatString.c_str(), readTime, computeTime, commTime);
// progress tracing for compute cluster management
if (wasProgressPrinted)
{
ProgressTracing::TraceTrainLoss(trainLossPerSample);
}
if (m_traceLevel > 0)
{
@ -1236,7 +1235,7 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
timer.Restart();
totalEpochSamples += aggregateNumSamplesWithLabel;
if (!useModelAveraging && !useDataASGD)
if (!useModelAveraging && !useASGD)
totalSamplesSeen += aggregateNumSamplesWithLabel;
readTimer.Restart();
@ -1263,15 +1262,15 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
nSamplesSinceLastModelSync = 0;
}
if (useASGD && (g_mpi->NumNodesInUse() > 1))
{
// ASGD also may not be synced after epoch finished, so do the sync here
int residualSampels = (int)nSamplesSinceLastModelSync;
totalSamplesSeen += residualSampels;
totalEpochSamples += residualSampels;
m_multiverso->PushAndPullModel(learnableNodes);
nSamplesSinceLastModelSync = 0;
}
if (useASGD && (m_mpi->NumNodesInUse() > 1))
{
// ASGD also shouldn't syncing after every epoch
int residualSampels = (int)nSamplesSinceLastModelSync;
totalSamplesSeen += residualSampels;
totalEpochSamples += residualSampels;
m_pMultiversoHelper->PushAndPullModel(learnableNodes);
nSamplesSinceLastModelSync = 0;
}
// compute final criterion values
if (useGradientAggregation)

Просмотреть файл

@ -555,8 +555,8 @@ protected:
private:
int SGDTrace(FILE* __restrict __stream, bool isPrependTimestamp, const char* __restrict __format, ...);
MultiversoWrapper<ElemType>* m_multiverso;
bool m_multiversoBarrier;
MultiversoHelper<ElemType>* m_pMultiversoHelper;
bool m_pMultiversoHelperBarrier;
};
}}}

Просмотреть файл

@ -67,7 +67,7 @@
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(CNTK_ENABLE_ASGD)'=='true'">
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\multiverso;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(SolutionDir)Source\multiverso\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(DebugBuild)">