debug check,. WARNING: should be revert later
This commit is contained in:
Родитель
83a60750d7
Коммит
da13c84d9d
|
@ -1,7 +1,5 @@
|
|||
#pragma once
|
||||
|
||||
// This uses Multiverso.h which requires
|
||||
// the header files in ..\Multiverso\include
|
||||
#include <multiverso/multiverso.h>
|
||||
#include <multiverso/table/array_table.h>
|
||||
#pragma comment(lib, "IMultiverso.lib")
|
||||
|
@ -50,7 +48,7 @@ namespace Microsoft {
|
|||
typedef shared_ptr<ComputationNode<ElemType>> ComputationNodePtr;
|
||||
public:
|
||||
MultiversoWrapper(const std::list<ComputationNodeBasePtr> & learnableNodes,
|
||||
int localWorkerNumber,
|
||||
int MPINodeNum,
|
||||
bool isPipeline = true,
|
||||
AdjustLearningRateatBeginning adjusttype = AdjustLearningRateatBeginning::None,
|
||||
double adjustcoef = 0.2,
|
||||
|
@ -63,7 +61,7 @@ namespace Microsoft {
|
|||
|
||||
//m_multiversoAdaptor = false;
|
||||
|
||||
m_totalClientNumber = localWorkerNumber;
|
||||
m_totalClientNumber = MPINodeNum;
|
||||
|
||||
//Pipeline releated variables
|
||||
m_isPipelined = isPipeline;
|
||||
|
@ -89,7 +87,7 @@ namespace Microsoft {
|
|||
|
||||
m_modelSizeOfEachServer = new size_t[m_totalClientNumber];
|
||||
m_indexOfEachServer = new size_t[m_totalClientNumber];
|
||||
MultiversoInit(learnableNodes, 1);
|
||||
MultiversoInit(learnableNodes);
|
||||
}
|
||||
|
||||
~MultiversoWrapper()
|
||||
|
@ -122,6 +120,7 @@ namespace Microsoft {
|
|||
// This function will upload parameters into Multiverso
|
||||
void InitModel(const std::list<ComputationNodeBasePtr> & learnableNodes)
|
||||
{
|
||||
printf("DEBUG POINT: %s, %d \n", __FILE__, __LINE__);
|
||||
float factor = (float) 1.0 / m_totalClientNumber;
|
||||
|
||||
//weights
|
||||
|
@ -138,6 +137,7 @@ namespace Microsoft {
|
|||
ElemType* px = m_cpuAsyncBuffer[0] + m_tableIndex[i];
|
||||
mat.CopyToArray(px, m_tableLength[i]);
|
||||
}
|
||||
printf("DEBUG POINT: %s, %d \n", __FILE__, __LINE__);
|
||||
|
||||
for (int i = 1; i < m_localCacheNumber; i++)
|
||||
memcpy(m_cpuAsyncBuffer[i], m_cpuAsyncBuffer[0], sizeof(ElemType) * m_totalModelSize);
|
||||
|
@ -146,7 +146,9 @@ namespace Microsoft {
|
|||
|
||||
std::transform(m_deltaArray, m_deltaArray + m_totalModelSize, m_deltaArray, std::bind1st(std::multiplies<ElemType>(), factor));
|
||||
|
||||
printf("DEBUG POINT: %s, %d \n", __FILE__, __LINE__);
|
||||
m_sharedArray->Add(m_deltaArray, m_totalModelSize);
|
||||
printf("DEBUG POINT: %s, %d \n", __FILE__, __LINE__);
|
||||
m_sharedArray->Get();
|
||||
//for (int row = 0; row < m_totalClientNumber; ++row)
|
||||
// m_multiversoAdaptor->Add(table_id, row, m_deltaArray + m_indexOfEachServer[row], factor);
|
||||
|
@ -154,6 +156,7 @@ namespace Microsoft {
|
|||
//m_multiversoAdaptor->BatchLoad(table_id, m_deltaArray, m_indexOfEachServer, m_modelSizeOfEachServer);
|
||||
|
||||
memcpy(m_deltaArray, m_sharedArray->raw().data(), sizeof(ElemType) * m_totalModelSize);
|
||||
printf("DEBUG POINT: %s, %d \n", __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
//Todo: support auto adjust learning rate
|
||||
|
@ -162,6 +165,7 @@ namespace Microsoft {
|
|||
//ASGD logic
|
||||
void PushAndPullModel(const std::list<ComputationNodeBasePtr> & learnableNodes)
|
||||
{
|
||||
printf("pushAndPullModel\n");
|
||||
//Note: maybe overflow.
|
||||
m_modelSyncCount++;
|
||||
|
||||
|
@ -169,6 +173,7 @@ namespace Microsoft {
|
|||
//if (m_isPipelined && m_prefetchThread->joinable())
|
||||
// m_prefetchThread->join();
|
||||
WaitAsyncBuffer();
|
||||
printf("waitAsyncBuffer()");
|
||||
|
||||
m_cacheIndex = m_cacheSwapIndex[m_cacheIndex];
|
||||
|
||||
|
@ -233,8 +238,11 @@ namespace Microsoft {
|
|||
// lr decay
|
||||
std::transform(m_deltaArray, m_deltaArray + m_totalModelSize, m_deltaArray, std::bind1st(std::multiplies<ElemType>(), factor));
|
||||
|
||||
printf("pre added.\n");
|
||||
m_sharedArray->Add(m_deltaArray, m_totalModelSize);
|
||||
printf("added.\n");
|
||||
m_sharedArray->Get();
|
||||
printf("geted.\n");
|
||||
memcpy(m_cpuAsyncBuffer[t_cacheIdx], m_sharedArray->raw().data(), m_totalModelSize);
|
||||
//////Communication
|
||||
//for (int row = 0; row < m_totalClientNumber; row++)
|
||||
|
@ -287,9 +295,11 @@ namespace Microsoft {
|
|||
|
||||
// lr decay
|
||||
std::transform(m_deltaArray, m_deltaArray + m_totalModelSize, m_deltaArray, std::bind1st(std::multiplies<ElemType>(), factor));
|
||||
|
||||
printf("pre added.\n");
|
||||
m_sharedArray->Add(m_deltaArray, m_totalModelSize);
|
||||
printf("added.\n");
|
||||
m_sharedArray->Get();
|
||||
printf("gotted.\n");
|
||||
memcpy(m_cpuAsyncBuffer[0], m_sharedArray->raw().data(), m_totalModelSize);
|
||||
|
||||
//for (int row = 0; row < m_totalClientNumber; row++)
|
||||
|
@ -332,7 +342,7 @@ namespace Microsoft {
|
|||
m_prefetchThread->join();
|
||||
}
|
||||
private:
|
||||
void MultiversoInit(const std::list<ComputationNodeBasePtr> & learnableNodes, int localWorkerNumber)
|
||||
void MultiversoInit(const std::list<ComputationNodeBasePtr> & learnableNodes)
|
||||
{
|
||||
assert(!m_isInitialized);
|
||||
m_isInitialized = true;
|
||||
|
@ -360,9 +370,15 @@ namespace Microsoft {
|
|||
m_modelSizeOfEachServer[i] = i < m_totalModelSize % m_totalClientNumber ? m_totalModelSize / m_totalClientNumber + 1 : m_totalModelSize / m_totalClientNumber;
|
||||
idx += m_modelSizeOfEachServer[i];
|
||||
}
|
||||
|
||||
printf("create worker table \n");
|
||||
m_sharedArray = new multiverso::ArrayWorker<ElemType>(m_totalModelSize);
|
||||
printf("create worker table successed %d\n", m_sharedArray->raw().size());
|
||||
printf("create server table \n");
|
||||
m_serverArray = new multiverso::ArrayServer<ElemType>(m_totalModelSize);
|
||||
printf("create server table successed %d\n", m_sharedArray->raw().size());
|
||||
|
||||
g_mpi->WaitAll();
|
||||
multiverso::MultiversoBarrier();
|
||||
//multiverso::SetTable(table_id, m_totalClientNumber, ((size_t)(m_totalModelSize / m_totalClientNumber)) + 1, sizeof(ElemType) == 4 ? "float" : "double");
|
||||
idx = 0;
|
||||
|
@ -375,16 +391,16 @@ namespace Microsoft {
|
|||
#ifndef CPUONLY
|
||||
//pinned memory
|
||||
for (int i = 0; i < m_localCacheNumber; ++i)
|
||||
CudaErrorCheck(cudaMallocHost((void **)&m_cpuAsyncBuffer[i], sizeof(ElemType) * (m_totalModelSize + 1), cudaHostAllocPortable));
|
||||
CudaErrorCheck(cudaMallocHost((void **)&m_cpuAsyncBuffer[i], sizeof(ElemType) * (m_totalModelSize), cudaHostAllocPortable));
|
||||
|
||||
CudaErrorCheck(cudaMallocHost((void **)&m_deltaArray, sizeof(ElemType) * (m_totalModelSize + 1), cudaHostAllocPortable));
|
||||
CudaErrorCheck(cudaMallocHost((void **)&m_deltaArray, sizeof(ElemType) * (m_totalModelSize), cudaHostAllocPortable));
|
||||
|
||||
//GPU memory cache
|
||||
for (int i = 0; i < m_localCacheNumber; i++)
|
||||
m_gpuAsyncBuffer[i] = new Matrix<ElemType>*[m_tableCount];
|
||||
#else
|
||||
for (int i = 0; i < m_localCacheNumber; i++)
|
||||
m_cpuAsyncBuffer[i] = new ElemType[m_totalModelSize + 1];
|
||||
m_cpuAsyncBuffer[i] = new ElemType[m_totalModelSize];
|
||||
#endif
|
||||
|
||||
//multiverso::Init(localWorkerNumber);
|
||||
|
@ -398,6 +414,7 @@ namespace Microsoft {
|
|||
////m_multiversoAdaptor = new multiverso::Adaptor(adaptor_id, 0);
|
||||
//printf("%s@rank %d/%d: Initialized Adaptor.\n",
|
||||
// getenv("COMPUTERNAME"), multiverso::GetMPIRank(), multiverso::GetMPISize());
|
||||
multiverso::Log::ResetLogLevel(multiverso::LogLevel::Debug);
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
|
|
Загрузка…
Ссылка в новой задаче