1. updating the multiverso submodule to the latest one;

2. change the config asgd using to argvector to offer more flexibility;
This commit is contained in:
Qiwei Ye 2016-02-25 14:40:12 +08:00
Родитель 956e1bc112
Коммит fb04c505d1
4 изменённых файлов: 128 добавлений и 151 удалений

Просмотреть файл

@ -51,7 +51,7 @@ namespace Microsoft {
public:
MultiversoWrapper(const std::list<ComputationNodeBasePtr> & learnableNodes,
int MPINodeNum,
bool isPipeline = true,
bool isAsyncBuffered = true,
AdjustLearningRateatBeginning adjusttype = AdjustLearningRateatBeginning::None,
double adjustcoef = 0.2,
size_t adjustnbmb = 600)
@ -64,29 +64,25 @@ namespace Microsoft {
m_totalClientNumber = MPINodeNum;
//Pipeline releated variables
m_isPipelined = isPipeline;
m_localCacheNumber = m_isPipelined ? 2 : 1;
m_isUseAsyncBuffered = isAsyncBuffered;
m_localCacheNumber = m_isUseAsyncBuffered ? 2 : 1;
m_cacheSwapIndex = new int[m_localCacheNumber];
//CPU double buffer
//CPU asynchronous buffer
m_cpuAsyncBuffer = new ElemType*[m_localCacheNumber];
#ifndef CPUONLY
//GPU double buffer
//GPU asynchronous buffer
m_gpuAsyncBuffer = new Matrix<ElemType>**[m_localCacheNumber];
//Communication Stream
//creat an communication stream for the data tranfer between GPU and CPU
CudaErrorCheck(cudaStreamCreate(&_commStream));
#endif
m_cacheIndex = 0;
m_bufferInUse = 0;
for (int i = 0; i < m_localCacheNumber; i++)
m_cacheSwapIndex[i] = (i + 1) % m_localCacheNumber;
m_prefetchThread = new thread();
m_prefetchThread = nullptr;
m_modelSizeOfEachServer = new size_t[m_totalClientNumber];
m_indexOfEachServer = new size_t[m_totalClientNumber];
MultiversoInit(learnableNodes);
}
@ -95,10 +91,10 @@ namespace Microsoft {
fprintf(stderr, "~MultiversoWrapper\n");
fflush(stderr);
if (m_isPipelined && m_prefetchThread != nullptr && m_prefetchThread->joinable())
if (m_isUseAsyncBuffered && m_prefetchThread != nullptr && m_prefetchThread->joinable())
m_prefetchThread->join();
delete m_cacheSwapIndex, m_deltaArray, m_modelSizeOfEachServer, m_indexOfEachServer;
delete m_cacheSwapIndex, m_deltaArray;
for (size_t i = 0; i < m_localCacheNumber; i++)
{
@ -115,13 +111,12 @@ namespace Microsoft {
multiverso::MultiversoShutDown(false);
}
// This function will upload parameters into Multiverso
// upoload preCompute model to the parameter servers
void InitModel(const std::list<ComputationNodeBasePtr> & learnableNodes)
{
float factor = (float) 1.0 / m_totalClientNumber;
//weights
int i = 0;
int i = 0; // indicate the index of learnable nodes
for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++, i++)
{
ComputationNodePtr node = dynamic_pointer_cast<ComputationNode<ElemType>>(*nodeIter);
@ -159,10 +154,10 @@ namespace Microsoft {
Timer timer;
WaitAsyncBuffer();
m_cacheIndex = m_cacheSwapIndex[m_cacheIndex];
m_bufferInUse = m_cacheSwapIndex[m_bufferInUse];
int i = 0;
if (m_isPipelined)
int i = 0; // indicate the index of learnable nodes
if (m_isUseAsyncBuffered)
{
for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++, i++)
@ -171,22 +166,22 @@ namespace Microsoft {
Microsoft::MSR::CNTK::Matrix<ElemType> &mat = node->Value();
#ifndef CPUONLY
//CNTK model -> GPU buffer
CudaErrorCheck(cudaMemcpy(m_gpuAsyncBuffer[m_cacheIndex][i]->BufferPointer(),
CudaErrorCheck(cudaMemcpy(m_gpuAsyncBuffer[m_bufferInUse][i]->BufferPointer(),
mat.BufferPointer(),
mat.GetNumElements() * sizeof(ElemType),
cudaMemcpyDeviceToDevice));
//GPU buffer -> CNTK model
CudaErrorCheck(cudaMemcpy(mat.BufferPointer(),
m_gpuAsyncBuffer[m_cacheSwapIndex[m_cacheIndex]][i]->BufferPointer(),
m_gpuAsyncBuffer[m_cacheSwapIndex[m_bufferInUse]][i]->BufferPointer(),
mat.GetNumElements() * sizeof(ElemType),
cudaMemcpyDeviceToDevice));
#else
ElemType * px = m_cpuAsyncBuffer[m_cacheIndex] + m_tableIndex[i];
ElemType * px = m_cpuAsyncBuffer[m_bufferInUse] + m_tableIndex[i];
mat.CopyToArray(px, m_tableLength[i]);
ElemType * py = m_cpuAsyncBuffer[m_cacheSwapIndex[m_cacheIndex]] + m_tableIndex[i];
ElemType * py = m_cpuAsyncBuffer[m_cacheSwapIndex[m_bufferInUse]] + m_tableIndex[i];
mat.SetValue(mat.GetNumRows(), mat.GetNumCols(), mat.GetDeviceId(), py);
@ -197,7 +192,7 @@ namespace Microsoft {
#ifndef CPUONLY
m_prefetchThread = new thread([&](){
float factor = DecayCoefficient();
int t_cacheIdx = m_cacheIndex;
int t_cacheIdx = m_bufferInUse;
int deviceId = m_gpuAsyncBuffer[t_cacheIdx][0]->GetDeviceId();
CudaErrorCheck(cudaSetDevice(deviceId));
@ -213,10 +208,10 @@ namespace Microsoft {
_commStream));
}
//Sync for copy
// waiting copy from GPU to CPU finished
CudaErrorCheck(cudaStreamSynchronize(_commStream));
//Calculate delta
// calculate delta
std::transform(m_deltaArray, m_deltaArray + m_totalModelSize, m_cpuAsyncBuffer[t_cacheIdx], m_deltaArray, std::minus<ElemType>());
// lr decay
@ -224,9 +219,8 @@ namespace Microsoft {
m_sharedArray->Add(m_deltaArray, m_totalModelSize);
m_sharedArray->Get(m_cpuAsyncBuffer[t_cacheIdx], m_totalModelSize);
//memcpy(m_cpuAsyncBuffer[t_cacheIdx], m_sharedArray->raw().data(), m_totalModelSize);
//CPU buffer -> GPU buffer
// copy parameters from CPU buffer to GPU buffer
for (int widx = 0; widx < m_tableCount; widx++)
{
ElemType * py = m_cpuAsyncBuffer[t_cacheIdx] + m_tableIndex[widx];
@ -244,13 +238,12 @@ namespace Microsoft {
#else
m_prefetchThread = new thread([&](){
float factor = DecayCoefficient();
int t_cacheIdx = m_cacheIndex;
int t_cacheIdx = m_bufferInUse;
std::transform(m_deltaArray, m_deltaArray + m_totalModelSize, m_cpuAsyncBuffer[t_cacheIdx], m_deltaArray, std::minus<ElemType>());
std::transform(m_deltaArray, m_deltaArray + m_totalModelSize, m_deltaArray, std::bind1st(std::multiplies<ElemType>(), factor));
m_sharedArray->Add(m_deltaArray, m_totalModelSize);
m_sharedArray->Get(m_cpuAsyncBuffer[t_cacheIdx], m_totalModelSize);
//memcpy(m_cpuAsyncBuffer[t_cacheIdx], m_sharedArray->raw().data(), m_totalModelSize);
});
#endif
@ -275,7 +268,6 @@ namespace Microsoft {
m_sharedArray->Add(m_deltaArray, m_totalModelSize);
m_sharedArray->Get(m_cpuAsyncBuffer[0], m_totalModelSize);
//memcpy(m_cpuAsyncBuffer[0], m_sharedArray->raw().data(), m_totalModelSize);
i = 0;
for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++, i++)
@ -284,7 +276,6 @@ namespace Microsoft {
Microsoft::MSR::CNTK::Matrix<ElemType> &mat = node->Value();
ElemType * px = m_cpuAsyncBuffer[0] + m_tableIndex[i];
mat.SetValue(mat.GetNumRows(), mat.GetNumCols(), mat.GetDeviceId(), px);
}
}
@ -311,7 +302,7 @@ namespace Microsoft {
{
m_prefetchThread->join();
delete m_prefetchThread;
m_prefetchThread == nullptr;
m_prefetchThread = nullptr;
}
}
private:
@ -320,8 +311,8 @@ namespace Microsoft {
assert(!m_isInitialized);
m_isInitialized = true;
multiverso::MultiversoInit();
//multiverso::Log::ResetLogLevel(multiverso::LogLevel::Debug);
multiverso::MultiversoInit();
//weights
for (auto nodeIter = learnableNodes.begin(); nodeIter != learnableNodes.end(); nodeIter++)
@ -335,22 +326,15 @@ namespace Microsoft {
m_tableCount = m_tableLength.size();
//init cache space.
// cacluate total of learnable node's size
m_totalModelSize = accumulate(m_tableLength.begin(), m_tableLength.end(), 0);
size_t idx = 0;
//for (int i = 0; i < m_totalClientNumber; i++)
//{
// m_indexOfEachServer[i] = idx;
// m_modelSizeOfEachServer[i] = i < m_totalModelSize % m_totalClientNumber ? m_totalModelSize / m_totalClientNumber + 1 : m_totalModelSize / m_totalClientNumber;
// idx += m_modelSizeOfEachServer[i];
//}
m_sharedArray = new multiverso::ArrayWorker<ElemType>(m_totalModelSize);
m_serverArray = new multiverso::ArrayServer<ElemType>(m_totalModelSize);
multiverso::MultiversoBarrier();
idx = 0;
size_t idx = 0;
for (size_t len : m_tableLength)
{
m_tableIndex.push_back(idx);
@ -358,18 +342,17 @@ namespace Microsoft {
}
#ifndef CPUONLY
//pinned memory
for (int i = 0; i < m_localCacheNumber; ++i)
CudaErrorCheck(cudaMallocHost((void **)&m_cpuAsyncBuffer[i], sizeof(ElemType) * (m_totalModelSize + 1), cudaHostAllocPortable));
CudaErrorCheck(cudaMallocHost((void **)&m_deltaArray, sizeof(ElemType) * (m_totalModelSize + 1), cudaHostAllocPortable));
//GPU memory cache
for (int i = 0; i < m_localCacheNumber; i++)
m_gpuAsyncBuffer[i] = new Matrix<ElemType>*[m_tableCount];
//create pinned memory
for (int i = 0; i < m_localCacheNumber; ++i)
CudaErrorCheck(cudaMallocHost((void **)&m_cpuAsyncBuffer[i], sizeof(ElemType) * (m_totalModelSize), cudaHostAllocPortable));
CudaErrorCheck(cudaMallocHost((void **)&m_deltaArray, sizeof(ElemType) * (m_totalModelSize), cudaHostAllocPortable));
#else
for (int i = 0; i < m_localCacheNumber; i++)
m_cpuAsyncBuffer[i] = new ElemType[m_totalModelSize + 1];
m_cpuAsyncBuffer[i] = new ElemType[m_totalModelSize];
#endif
}
@ -398,10 +381,10 @@ namespace Microsoft {
int m_totalClientNumber;
bool m_isPipelined;
bool m_isUseAsyncBuffered;
int m_localCacheNumber;
int * m_cacheSwapIndex;
int m_cacheIndex;
int m_bufferInUse;
size_t m_modelSyncCount;
@ -415,10 +398,6 @@ namespace Microsoft {
ElemType * m_deltaArray;
ElemType ** m_cpuAsyncBuffer;
// TODO deprecated this unused variables
size_t * m_modelSizeOfEachServer;
size_t * m_indexOfEachServer;
//GPU double buffer
Matrix<ElemType> *** m_gpuAsyncBuffer;
int m_tableCount;

@ -1 +1 @@
Subproject commit 50c45ccecf05a78366285e82b1a2a4b3431954e5
Subproject commit d7247ce844a322022883581c025229585fee04c6

Просмотреть файл

@ -319,19 +319,20 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
m_seqGammarCalcAMF, m_seqGammarCalcLMF, m_seqGammarCalcWP, m_seqGammarCalcbMMIFactor, m_seqGammarCalcUsesMBR);
}
//Multiverso Warpper for ASGD logic init
if (m_parallelizationMethod == ParallelizationMethod::DataParallelASGD)
{
m_multiverso = new MultiversoWrapper<ElemType>(learnableNodes,
g_mpi->NumNodesInUse(),
m_isPipeline,
m_adjustlearningrateatbeginning,
m_adjustcoefficient,
m_adjustnbminibatch);
m_multiverso->InitModel(learnableNodes);
m_multiversoBarrier = false;
m_multiverso->WaitAll();
}
//Multiverso Warpper for ASGD logic init
if (m_parallelizationMethod == ParallelizationMethod::DataParallelASGD)
{
g_mpi->WaitAll();
m_multiverso = new MultiversoWrapper<ElemType>(learnableNodes,
g_mpi->NumNodesInUse(),
m_isPipeline,
m_adjustlearningrateatbeginning,
m_adjustcoefficient,
m_adjustnbminibatch);
m_multiverso->InitModel(learnableNodes);
m_multiversoBarrier = false;
m_multiverso->WaitAll();
}
// --- MAIN EPOCH LOOP
for (int i = startEpoch; i < (int) m_maxEpochs; i++) // TODO: why is this an int, and not a size_t?
@ -343,9 +344,9 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
g_mpi->WaitAll();
}
if (m_parallelizationMethod == ParallelizationMethod::DataParallelASGD && m_nEpochBarrier > 0 && i % m_nEpochBarrier == 0)
if (m_parallelizationMethod == ParallelizationMethod::DataParallelASGD && m_nEpochBarrier[i] > 0 && i % m_nEpochBarrier[i] == 0)
{
m_multiverso->WaitAsyncBuffer(); // [Review:qiwye] does
m_multiverso->WaitAsyncBuffer();
m_multiverso->WaitAll();
}
@ -701,11 +702,11 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
}
}
delete inputMatrices;
if (m_parallelizationMethod == ParallelizationMethod::DataParallelASGD)
{
delete m_multiverso;
}
delete inputMatrices;
if (m_parallelizationMethod == ParallelizationMethod::DataParallelASGD)
{
delete m_multiverso;
}
}
// -----------------------------------------------------------------------
@ -759,9 +760,9 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
(epochNumber >= m_parallelizationStartEpochNum));
bool useModelAveraging = ((m_parallelizationMethod == ParallelizationMethod::ModelAveragingSGD) &&
(epochNumber >= m_parallelizationStartEpochNum));
bool useASGD = ((m_parallelizationMethod == ParallelizationMethod::DataParallelASGD) &&
bool useASGD = ((m_parallelizationMethod == ParallelizationMethod::DataParallelASGD) &&
(epochNumber >= m_parallelizationStartEpochNum));
bool useParallelTrain = useGradientAggregation || useModelAveraging || useASGD;
bool useParallelTrain = useGradientAggregation || useModelAveraging || useASGD;
// MA-related variables
size_t nSamplesSinceLastModelSync = 0;
@ -1097,28 +1098,28 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
}
aggregateNumSamplesWithLabel = processedSamples;
}
}
}
if (useASGD && g_mpi->NumNodesInUse() > 1)
if (useASGD && g_mpi->NumNodesInUse() > 1)
{
// Determine if any samples were processed across any of the ranks
if (useDistributedMBReading)
{
// Determine if any samples were processed across any of the ranks
if (useDistributedMBReading)
{
noMoreSamplesToProcess = !wasDataRead;
}
size_t processedSamples = 0;
if (nSamplesSinceLastModelSync >= m_nFramesBetweenASGDSync)
{
m_multiverso->PushAndPullModel(learnableNodes);
processedSamples = nSamplesSinceLastModelSync;
nSamplesSinceLastModelSync = 0;
}
aggregateNumSamplesWithLabel = processedSamples;
noMoreSamplesToProcess = !wasDataRead;
}
commTimer.Stop();
commTime += commTimer.ElapsedSeconds();
size_t processedSamples = 0;
if (nSamplesSinceLastModelSync >= m_nFramesBetweenASGDSync[epochNumber])
{
m_multiverso->PushAndPullModel(learnableNodes);
processedSamples = nSamplesSinceLastModelSync;
nSamplesSinceLastModelSync = 0;
}
aggregateNumSamplesWithLabel = processedSamples;
}
commTimer.Stop();
commTime += commTimer.ElapsedSeconds();
timer.Stop();
numMBsRun++;
@ -1256,15 +1257,15 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
nSamplesSinceLastModelSync = 0;
}
if (useASGD && (g_mpi->NumNodesInUse() > 1))
{
// ASGD also may not be synced after epoch finished, so do the sync here
int residualSampels = (int)nSamplesSinceLastModelSync;
totalSamplesSeen += residualSampels;
totalEpochSamples += residualSampels;
m_multiverso->PushAndPullModel(learnableNodes);
nSamplesSinceLastModelSync = 0;
}
if (useASGD && (g_mpi->NumNodesInUse() > 1))
{
// ASGD also may not be synced after epoch finished, so do the sync here
int residualSampels = (int)nSamplesSinceLastModelSync;
totalSamplesSeen += residualSampels;
totalEpochSamples += residualSampels;
m_multiverso->PushAndPullModel(learnableNodes);
nSamplesSinceLastModelSync = 0;
}
// compute final criterion values
if (useGradientAggregation)
@ -2697,53 +2698,50 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
m_parallelizationStartEpochNum = 0;
m_nFramesBetweenMASync = 40000; // default 40k frames
m_nFramesBetweenASGDSync = 1280;
m_numMBsToASGDPushAndPull = 0;
m_nEpochBarrier = 0;
m_adjustlearningrateatbeginning = AdjustLearningRateatBeginning::None;
if ((g_mpi != nullptr) && configSGD.Exists(L"ParallelTrain"))
{
const ConfigRecordType& configParallelTrain(configSGD(L"ParallelTrain", ConfigRecordType::Record()));
m_parallelizationMethod = ParseParallelizationMethod(configParallelTrain(L"parallelizationMethod", L"none"));
m_parallelizationStartEpochNum = configParallelTrain(L"parallelizationStartEpoch", (int) 1) - 1; // Epoch numbers internally are 0 based
m_enableDistributedMBReading = configParallelTrain(L"distributedMBReading", false);
m_syncStatsTrace = configParallelTrain(L"syncPerfStats", (int) 0);
const ConfigRecordType& configParallelTrain(configSGD(L"ParallelTrain", ConfigRecordType::Record()));
m_parallelizationMethod = ParseParallelizationMethod(configParallelTrain(L"parallelizationMethod", L"none"));
m_parallelizationStartEpochNum = configParallelTrain(L"parallelizationStartEpoch", (int)1) - 1; // Epoch numbers internally are 0 based
m_enableDistributedMBReading = configParallelTrain(L"distributedMBReading", false);
m_syncStatsTrace = configParallelTrain(L"syncPerfStats", (int)0);
if (configParallelTrain.Exists(L"DataParallelSGD"))
{
const ConfigRecordType& configDataParallelSGD(configParallelTrain(L"DataParallelSGD", ConfigRecordType::Record()));
size_t defaultGradientBits = 8 * sizeofElemType;
m_numGradientBits = configDataParallelSGD(L"gradientBits", defaultGradientBits);
m_zeroThresholdFor1Bit = configDataParallelSGD(L"useZeroThresholdFor1BitQuantization", true);
m_bufferedAsyncGradientAggregation = configDataParallelSGD(L"useBufferedAsyncGradientAggregation", false);
if ((m_numGradientBits < 1) || (m_numGradientBits > (8 * sizeofElemType)))
{
InvalidArgument("gradientBits must be in the range [1, 32] when using precision=float and in range [1, 64] when using precision=double!");
}
}
if (configParallelTrain.Exists(L"ModelAveragingSGD"))
{
const ConfigRecordType& configMASGD(configParallelTrain(L"ModelAveragingSGD", ConfigRecordType::Record()));
m_nFramesBetweenMASync = configMASGD(L"syncFrequencyInFrames", (size_t) 40000);
}
if (configParallelTrain.Exists(L"DataParallelASGD"))
if (configParallelTrain.Exists(L"DataParallelSGD"))
{
const ConfigRecordType& configDataParallelSGD(configParallelTrain(L"DataParallelSGD", ConfigRecordType::Record()));
size_t defaultGradientBits = 8 * sizeofElemType;
m_numGradientBits = configDataParallelSGD(L"gradientBits", defaultGradientBits);
m_zeroThresholdFor1Bit = configDataParallelSGD(L"useZeroThresholdFor1BitQuantization", true);
m_bufferedAsyncGradientAggregation = configDataParallelSGD(L"useBufferedAsyncGradientAggregation", false);
if ((m_numGradientBits < 1) || (m_numGradientBits >(8 * sizeofElemType)))
{
const ConfigRecordType & configDataParallelASGD(configParallelTrain(L"DataParallelASGD", ConfigRecordType::Record()));
m_nFramesBetweenASGDSync = configDataParallelASGD(L"SyncFrequencyInFrames", (size_t)1280);
m_isPipeline = configDataParallelASGD(L"UsePipeline", true);
m_nEpochBarrier = configDataParallelASGD(L"EpochBarrier", (size_t)0);
if (configDataParallelASGD.Exists(L"AdjustLearningRateAtBeginning"))
{
const ConfigRecordType & configAdjustLearningRateAtBeginning(configDataParallelASGD(L"AdjustLearningRateAtBeginning", ConfigRecordType::Record()));
m_adjustlearningrateatbeginning = AdjustLearningRateAtBeginningType(configAdjustLearningRateAtBeginning(L"adjustType", L"None"));
m_adjustcoefficient = configAdjustLearningRateAtBeginning(L"adjustCoefficient", (double)0.2);
m_adjustnbminibatch = configAdjustLearningRateAtBeginning(L"adjustNbMinibatch", (size_t)600);
}
InvalidArgument("gradientBits must be in the range [1, 32] when using precision=float and in range [1, 64] when using precision=double!");
}
}
if (configParallelTrain.Exists(L"ModelAveragingSGD"))
{
const ConfigRecordType& configMASGD(configParallelTrain(L"ModelAveragingSGD", ConfigRecordType::Record()));
m_nFramesBetweenMASync = configMASGD(L"syncFrequencyInFrames", (size_t)40000);
}
if (configParallelTrain.Exists(L"DataParallelASGD"))
{
const ConfigRecordType & configDataParallelASGD(configParallelTrain(L"DataParallelASGD", ConfigRecordType::Record()));
m_nFramesBetweenASGDSync = configDataParallelASGD(L"SyncFrequencyInFrames", ConfigRecordType::Array(intargvector(vector<int>{1280})));
m_isPipeline = configDataParallelASGD(L"UsePipeline", true);
m_nEpochBarrier = configDataParallelASGD(L"EpochBarrier", ConfigRecordType::Array(intargvector(vector<int>{0})));
if (configDataParallelASGD.Exists(L"AdjustLearningRateAtBeginning"))
{
const ConfigRecordType & configAdjustLearningRateAtBeginning(configDataParallelASGD(L"AdjustLearningRateAtBeginning", ConfigRecordType::Record()));
m_adjustlearningrateatbeginning = AdjustLearningRateAtBeginningType(configAdjustLearningRateAtBeginning(L"adjustType", L"None"));
m_adjustcoefficient = configAdjustLearningRateAtBeginning(L"adjustCoefficient", (double)0.2);
m_adjustnbminibatch = configAdjustLearningRateAtBeginning(L"adjustNbMinibatch", (size_t)600);
}
}
}
}

Просмотреть файл

@ -249,11 +249,11 @@ protected:
double m_L1RegWeight;
// Parallel training related with ASGD
size_t m_numMBsToASGDPushAndPull; // decide how many minibatchs should ASGD to a pull&push to parameter server.
intargvector m_numMBsToASGDPushAndPull; // decide how many minibatchs should ASGD to a pull&push to parameter server.
// note that, this will override m_nFramesBetweenASGDSync when set.
size_t m_nFramesBetweenASGDSync;
intargvector m_nFramesBetweenASGDSync;
bool m_isPipeline;
size_t m_nEpochBarrier;
intargvector m_nEpochBarrier;
AdjustLearningRateatBeginning m_adjustlearningrateatbeginning;
double m_adjustcoefficient;
size_t m_adjustnbminibatch;