From da13c84d9dd73525aa4a99dbc5af0599b9d21da9 Mon Sep 17 00:00:00 2001 From: Qiwei Ye Date: Tue, 16 Feb 2016 11:47:37 +0800 Subject: [PATCH] debug check,. WARNING: should be revert later --- Source/Common/Include/MultiversoWrapper.h | 37 +++++++++++++++++------ 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/Source/Common/Include/MultiversoWrapper.h b/Source/Common/Include/MultiversoWrapper.h index fe99c3d61..ffd1d463b 100644 --- a/Source/Common/Include/MultiversoWrapper.h +++ b/Source/Common/Include/MultiversoWrapper.h @@ -1,7 +1,5 @@ #pragma once -// This uses Multiverso.h which requires -// the header files in ..\Multiverso\include #include #include #pragma comment(lib, "IMultiverso.lib") @@ -50,7 +48,7 @@ namespace Microsoft { typedef shared_ptr> ComputationNodePtr; public: MultiversoWrapper(const std::list & learnableNodes, - int localWorkerNumber, + int MPINodeNum, bool isPipeline = true, AdjustLearningRateatBeginning adjusttype = AdjustLearningRateatBeginning::None, double adjustcoef = 0.2, @@ -63,7 +61,7 @@ namespace Microsoft { //m_multiversoAdaptor = false; - m_totalClientNumber = localWorkerNumber; + m_totalClientNumber = MPINodeNum; //Pipeline releated variables m_isPipelined = isPipeline; @@ -89,7 +87,7 @@ namespace Microsoft { m_modelSizeOfEachServer = new size_t[m_totalClientNumber]; m_indexOfEachServer = new size_t[m_totalClientNumber]; - MultiversoInit(learnableNodes, 1); + MultiversoInit(learnableNodes); } ~MultiversoWrapper() @@ -122,6 +120,7 @@ namespace Microsoft { // This function will upload parameters into Multiverso void InitModel(const std::list & learnableNodes) { + printf("DEBUG POINT: %s, %d \n", __FILE__, __LINE__); float factor = (float) 1.0 / m_totalClientNumber; //weights @@ -138,6 +137,7 @@ namespace Microsoft { ElemType* px = m_cpuAsyncBuffer[0] + m_tableIndex[i]; mat.CopyToArray(px, m_tableLength[i]); } + printf("DEBUG POINT: %s, %d \n", __FILE__, __LINE__); for (int i = 1; i < m_localCacheNumber; i++) memcpy(m_cpuAsyncBuffer[i], m_cpuAsyncBuffer[0], sizeof(ElemType) * m_totalModelSize); @@ -146,7 +146,9 @@ namespace Microsoft { std::transform(m_deltaArray, m_deltaArray + m_totalModelSize, m_deltaArray, std::bind1st(std::multiplies(), factor)); + printf("DEBUG POINT: %s, %d \n", __FILE__, __LINE__); m_sharedArray->Add(m_deltaArray, m_totalModelSize); + printf("DEBUG POINT: %s, %d \n", __FILE__, __LINE__); m_sharedArray->Get(); //for (int row = 0; row < m_totalClientNumber; ++row) // m_multiversoAdaptor->Add(table_id, row, m_deltaArray + m_indexOfEachServer[row], factor); @@ -154,6 +156,7 @@ namespace Microsoft { //m_multiversoAdaptor->BatchLoad(table_id, m_deltaArray, m_indexOfEachServer, m_modelSizeOfEachServer); memcpy(m_deltaArray, m_sharedArray->raw().data(), sizeof(ElemType) * m_totalModelSize); + printf("DEBUG POINT: %s, %d \n", __FILE__, __LINE__); } //Todo: support auto adjust learning rate @@ -162,6 +165,7 @@ namespace Microsoft { //ASGD logic void PushAndPullModel(const std::list & learnableNodes) { + printf("pushAndPullModel\n"); //Note: maybe overflow. m_modelSyncCount++; @@ -169,6 +173,7 @@ namespace Microsoft { //if (m_isPipelined && m_prefetchThread->joinable()) // m_prefetchThread->join(); WaitAsyncBuffer(); + printf("waitAsyncBuffer()"); m_cacheIndex = m_cacheSwapIndex[m_cacheIndex]; @@ -233,8 +238,11 @@ namespace Microsoft { // lr decay std::transform(m_deltaArray, m_deltaArray + m_totalModelSize, m_deltaArray, std::bind1st(std::multiplies(), factor)); + printf("pre added.\n"); m_sharedArray->Add(m_deltaArray, m_totalModelSize); + printf("added.\n"); m_sharedArray->Get(); + printf("geted.\n"); memcpy(m_cpuAsyncBuffer[t_cacheIdx], m_sharedArray->raw().data(), m_totalModelSize); //////Communication //for (int row = 0; row < m_totalClientNumber; row++) @@ -287,9 +295,11 @@ namespace Microsoft { // lr decay std::transform(m_deltaArray, m_deltaArray + m_totalModelSize, m_deltaArray, std::bind1st(std::multiplies(), factor)); - + printf("pre added.\n"); m_sharedArray->Add(m_deltaArray, m_totalModelSize); + printf("added.\n"); m_sharedArray->Get(); + printf("gotted.\n"); memcpy(m_cpuAsyncBuffer[0], m_sharedArray->raw().data(), m_totalModelSize); //for (int row = 0; row < m_totalClientNumber; row++) @@ -332,7 +342,7 @@ namespace Microsoft { m_prefetchThread->join(); } private: - void MultiversoInit(const std::list & learnableNodes, int localWorkerNumber) + void MultiversoInit(const std::list & learnableNodes) { assert(!m_isInitialized); m_isInitialized = true; @@ -360,9 +370,15 @@ namespace Microsoft { m_modelSizeOfEachServer[i] = i < m_totalModelSize % m_totalClientNumber ? m_totalModelSize / m_totalClientNumber + 1 : m_totalModelSize / m_totalClientNumber; idx += m_modelSizeOfEachServer[i]; } + + printf("create worker table \n"); m_sharedArray = new multiverso::ArrayWorker(m_totalModelSize); + printf("create worker table successed %d\n", m_sharedArray->raw().size()); + printf("create server table \n"); m_serverArray = new multiverso::ArrayServer(m_totalModelSize); + printf("create server table successed %d\n", m_sharedArray->raw().size()); + g_mpi->WaitAll(); multiverso::MultiversoBarrier(); //multiverso::SetTable(table_id, m_totalClientNumber, ((size_t)(m_totalModelSize / m_totalClientNumber)) + 1, sizeof(ElemType) == 4 ? "float" : "double"); idx = 0; @@ -375,16 +391,16 @@ namespace Microsoft { #ifndef CPUONLY //pinned memory for (int i = 0; i < m_localCacheNumber; ++i) - CudaErrorCheck(cudaMallocHost((void **)&m_cpuAsyncBuffer[i], sizeof(ElemType) * (m_totalModelSize + 1), cudaHostAllocPortable)); + CudaErrorCheck(cudaMallocHost((void **)&m_cpuAsyncBuffer[i], sizeof(ElemType) * (m_totalModelSize), cudaHostAllocPortable)); - CudaErrorCheck(cudaMallocHost((void **)&m_deltaArray, sizeof(ElemType) * (m_totalModelSize + 1), cudaHostAllocPortable)); + CudaErrorCheck(cudaMallocHost((void **)&m_deltaArray, sizeof(ElemType) * (m_totalModelSize), cudaHostAllocPortable)); //GPU memory cache for (int i = 0; i < m_localCacheNumber; i++) m_gpuAsyncBuffer[i] = new Matrix*[m_tableCount]; #else for (int i = 0; i < m_localCacheNumber; i++) - m_cpuAsyncBuffer[i] = new ElemType[m_totalModelSize + 1]; + m_cpuAsyncBuffer[i] = new ElemType[m_totalModelSize]; #endif //multiverso::Init(localWorkerNumber); @@ -398,6 +414,7 @@ namespace Microsoft { ////m_multiversoAdaptor = new multiverso::Adaptor(adaptor_id, 0); //printf("%s@rank %d/%d: Initialized Adaptor.\n", // getenv("COMPUTERNAME"), multiverso::GetMPIRank(), multiverso::GetMPISize()); + multiverso::Log::ResetLogLevel(multiverso::LogLevel::Debug); fflush(stdout); }