From 146ef4878f4822128c3d711da648f80ee19d0111 Mon Sep 17 00:00:00 2001 From: Qiwei Ye Date: Wed, 13 Apr 2016 16:18:50 +0800 Subject: [PATCH] renaming the smooth updater to momentum updater; --- next/IMultiverso.vcxproj | 2 +- next/IMultiverso.vcxproj.filters | 6 +- next/Test/main.cpp | 23 +++-- .../multiverso/updater/adagrad_updater.h | 34 +++++-- ..._gradient_updater.h => momentum_updater.h} | 12 +-- .../updater/second_order_gradient_updater.h | 96 ++++++++++--------- next/include/multiverso/updater/updater.h | 20 +++- next/src/table/matrix_table.cpp | 8 +- next/src/updater/updater.cpp | 7 +- 9 files changed, 126 insertions(+), 82 deletions(-) rename next/include/multiverso/updater/{smooth_gradient_updater.h => momentum_updater.h} (67%) diff --git a/next/IMultiverso.vcxproj b/next/IMultiverso.vcxproj index f9345e6..50beb2d 100644 --- a/next/IMultiverso.vcxproj +++ b/next/IMultiverso.vcxproj @@ -192,7 +192,7 @@ - + diff --git a/next/IMultiverso.vcxproj.filters b/next/IMultiverso.vcxproj.filters index 9a78ae8..206782b 100644 --- a/next/IMultiverso.vcxproj.filters +++ b/next/IMultiverso.vcxproj.filters @@ -100,9 +100,6 @@ updater - - updater - updater @@ -115,6 +112,9 @@ updater + + updater + diff --git a/next/Test/main.cpp b/next/Test/main.cpp index 90b00bb..2f2aafa 100644 --- a/next/Test/main.cpp +++ b/next/Test/main.cpp @@ -102,6 +102,7 @@ void TestArray(int argc, char* argv[]) { UpdateOption option; option.set_learning_rate(1 - 0.0001 * i); option.set_momentum(0.99); + option.set_rho(0.01f); shared_array->Add(delta.data(), 1000000, &option); @@ -343,8 +344,8 @@ void TestMatrix(int argc, char* argv[]){ // Log::Debug("rank %d has no worker\n", MV_Rank()); // } - MatrixWorkerTable* worker_table = new MatrixWorkerTable(num_row, num_col); - MatrixServerTable* server_table = new MatrixServerTable(num_row, num_col); + MatrixWorkerTable* worker_table = new MatrixWorkerTable(num_row, num_col); + MatrixServerTable* server_table = new MatrixServerTable(num_row, num_col); std::thread* m_prefetchThread = nullptr; MV_Barrier(); @@ -359,21 +360,22 @@ void TestMatrix(int argc, char* argv[]){ std::vector v = { 0, 1, 5, 10 }; // test data - std::vector delta(size); + std::vector delta(size); for (int i = 0; i < size; ++i) delta[i] = i; - int * data = new int[size]; + float * data = new float[size]; m_prefetchThread = new std::thread([&](){ - worker_table->Add(delta.data(), size); //add all + UpdateOption option; + worker_table->Add(delta.data(), size, &option); //add all worker_table->Get(data, size); //get all printf("----------------------------\n"); for (int i = 0; i < num_row; ++i){ printf("rank %d, row %d: ", MV_Rank(), i); for (int j = 0; j < num_col; ++j) - printf("%d ", data[i * num_col + j]); + printf("%.2f ", data[i * num_col + j]); printf("\n"); }; }); @@ -386,9 +388,10 @@ void TestMatrix(int argc, char* argv[]){ m_prefetchThread = nullptr; } //test data_vec - std::vector data_rows = { &data[0], &data[num_col], &data[5 * num_col], &data[10 * num_col] }; - std::vector delta_rows = { &delta[0], &delta[num_col], &delta[5 * num_col], &delta[10 * num_col] }; - worker_table->Add(v, delta_rows, num_col); + std::vector data_rows = { &data[0], &data[num_col], &data[5 * num_col], &data[10 * num_col] }; + std::vector delta_rows = { &delta[0], &delta[num_col], &delta[5 * num_col], &delta[10 * num_col] }; + UpdateOption option; + worker_table->Add(v, delta_rows, num_col, &option); worker_table->Get(v, data_rows, num_col); MV_Barrier(); @@ -396,7 +399,7 @@ void TestMatrix(int argc, char* argv[]){ for (int i = 0; i < num_row; ++i){ printf("rank %d, row %d: ", MV_Rank(), i); for (int j = 0; j < num_col; ++j) - printf("%d ", data[i * num_col + j]); + printf("%.2f ", data[i * num_col + j]); printf("\n"); } MV_Barrier(); diff --git a/next/include/multiverso/updater/adagrad_updater.h b/next/include/multiverso/updater/adagrad_updater.h index 3ae5029..77ed15d 100644 --- a/next/include/multiverso/updater/adagrad_updater.h +++ b/next/include/multiverso/updater/adagrad_updater.h @@ -5,6 +5,8 @@ #include #include +#include + namespace multiverso { @@ -13,25 +15,41 @@ class AdaGradUpdater : public Updater { public: explicit AdaGradUpdater(size_t size): e(1e-6f), size_(size) { - historic_g_sqr_.resize(MV_NumWorkers()); - for (auto s : historic_g_sqr_){ - s.resize(size_); - } - Log::Debug("[AdaGradUpdater] Init with size = %d, e = %f. \n", size_, e); + historic_g_sqr_.resize(MV_NumWorkers(), std::vector(size_)); + Log::Debug("[AdaGradUpdater] Init with size = %d, e = %f. historic_size = %d\n", size_, e, historic_g_sqr_.size()); } + void Update(size_t num_element, T* data, T* delta, UpdateOption* option, size_t offset) override { + auto g_sqr_data_ = historic_g_sqr_.at(option->worker_id()); for (size_t index = 0; index < num_element; ++index) { - - historic_g_sqr_[option->worker_id()][index + offset] -= + g_sqr_data_[index + offset] -= delta[index] * delta[index] / option->learning_rate() / option->learning_rate(); + //[TODO(qiwye)] sqrt take too much time data[index + offset] -= option->rho() / - std::sqrt(historic_g_sqr_[option->worker_id()][index + offset] + e) * + std::sqrt(g_sqr_data_[index + offset] + e) * delta[index] / option->learning_rate(); + + //data[index + offset] -= option->rho() * + // QuakeRsqrt(g_sqr_data_[index + offset] + e) * + // delta[index] / option->learning_rate(); } } + + +private: + + float QuakeRsqrt(float number){ + float x = number * 0.5f, y = number; + std::uint32_t i; + std::memcpy(&i, &y, sizeof(float)); + i = 0x5f3759df - (i >> 1); + std::memcpy(&y, &i, sizeof(float)); + return y * (1.5f - (x * y * y)); + } + protected: std::vector< std::vector> historic_g_sqr_; float e; diff --git a/next/include/multiverso/updater/smooth_gradient_updater.h b/next/include/multiverso/updater/momentum_updater.h similarity index 67% rename from next/include/multiverso/updater/smooth_gradient_updater.h rename to next/include/multiverso/updater/momentum_updater.h index 71b2e42..68470c4 100644 --- a/next/include/multiverso/updater/smooth_gradient_updater.h +++ b/next/include/multiverso/updater/momentum_updater.h @@ -1,5 +1,5 @@ -#ifndef MULTIVERSO_UPDATER_SMOOTH_GRADIENT_UPDATER_H_ -#define MULTIVERSO_UPDATER_SMOOTH_GRADIENT_UPDATER_H_ +#ifndef MULTIVERSO_UPDATER_MOMENTUM_UPDATER_H_ +#define MULTIVERSO_UPDATER_MOMENTUM_UPDATER_H_ #include "updater.h" #include @@ -7,9 +7,9 @@ namespace multiverso { template -class SmoothGradientUpdater : public Updater { +class MomentumUpdater : public Updater { public: - explicit SmoothGradientUpdater(size_t size) : size_(size) { + explicit MomentumUpdater(size_t size) : size_(size) { Log::Debug("[SmoothGradientUpdater] Init with size = %d. \n", size_); smooth_gradient_.resize(size_); } @@ -22,7 +22,7 @@ public: data[index + offset] -= smooth_gradient_[index + offset]; } } - ~SmoothGradientUpdater() { smooth_gradient_.clear(); } + ~MomentumUpdater() { smooth_gradient_.clear(); } protected: std::vector smooth_gradient_; size_t size_; @@ -30,4 +30,4 @@ protected: } -#endif // MULTIVERSO_UPDATER_SMOOTH_GRADIENT_UPDATER_H_ \ No newline at end of file +#endif // MULTIVERSO_UPDATER_MOMENTUM_UPDATER_H_ \ No newline at end of file diff --git a/next/include/multiverso/updater/second_order_gradient_updater.h b/next/include/multiverso/updater/second_order_gradient_updater.h index 0b1786e..31b373c 100644 --- a/next/include/multiverso/updater/second_order_gradient_updater.h +++ b/next/include/multiverso/updater/second_order_gradient_updater.h @@ -1,58 +1,64 @@ #ifndef MULTIVERSO_UPDATER_SECOND_ORDER_GRADIENT_UPDATER_H_ #define MULTIVERSO_UPDATER_SECOND_ORDER_GRADIENT_UPDATER_H_ +#include +#include + #include "updater.h" -#include namespace multiverso { - // [TODO(qiwye)]:rename the class to Shuxin Zheng's algorithms - template - class SecondOrderUpdater : public Updater { - public: - explicit SecondOrderUpdater(size_t size) : - size_(size) { - Log::Debug("[SecondOrderUpdater] Init with size = %d. \n", size_); - shadow_copies_.resize(MV_NumWorkers()); - for (auto s : shadow_copies_){ - s.resize(size); - } - historic_g_sqr_.resize(MV_NumWorkers()); - for (auto s : historic_g_sqr_){ - s.resize(size); - } +// [TODO(qiwye)]:rename the class to Shuxin Zheng's algorithms +template +class SecondOrderUpdater : public Updater { +public: + explicit SecondOrderUpdater(size_t size) : + size_(size) { + Log::Debug("[SecondOrderUpdater] Init with size = %d. \n", size_); + shadow_copies_.resize(MV_NumWorkers(), std::vector(size_)); + historic_g_sqr_.resize(MV_NumWorkers(), std::vector(size_)); + } + void Update(size_t num_element, T*data, T*delta, + UpdateOption* option, size_t offset) override { + auto g_sqr_data_ = historic_g_sqr_.at(option->worker_id()); + auto copies_data_ = shadow_copies_.at(option->worker_id()); + for (size_t index = 0; index < num_element; ++index) { + // gsqr = (1 - r) * g^2 + r * gsqr + g_sqr_data_[index + offset] = + (1 - option->rho()) * delta[index] * delta[index] + / option->learning_rate() / option->learning_rate() + + option->rho() * g_sqr_data_[index + offset]; + + data[index + offset] -= delta[index] + option->lambda() * + std::sqrt(g_sqr_data_[index + offset]) * + (data[index + offset] - copies_data_[index + offset]); + + // caching each worker's latest version of parameter + copies_data_[index + offset] = data[index + offset]; } - void Update(size_t num_element, T*data, T*delta, - UpdateOption* option, size_t offset) override { - - for (size_t index = 0; index < num_element; ++index) { - // gsqr = (1 - r) * g^2 + r * gsqr - historic_g_sqr_[option->worker_id()][index + offset] = - (1 - option->rho()) * delta[index] * delta[index] - / option->learning_rate() / option->learning_rate() + - option->rho() * historic_g_sqr_[option->worker_id()][index + offset]; + } + ~SecondOrderUpdater() { + shadow_copies_.clear(); + historic_g_sqr_.clear(); + } - data[index + offset] -= delta[index] + option->lambda() * - std::sqrt(historic_g_sqr_[option->worker_id()][index + offset]) * - (data[index + offset] - shadow_copies_[option->worker_id()][index + offset]); +private: + float QuakeRsqrt(float number) { + float x = number * 0.5f, y = number; + std::uint32_t i; + std::memcpy(&i, &y, sizeof(float)); + i = 0x5f3759df - (i >> 1); + std::memcpy(&y, &i, sizeof(float)); + return y * (1.5f - (x * y * y)); + } - // caching each worker's latest version of parameter - shadow_copies_[option->worker_id()][index + offset] = data[index + offset]; - } - } - ~SecondOrderUpdater() { - shadow_copies_.clear(); - historic_g_sqr_.clear(); - } - protected: - std::vector< std::vector> shadow_copies_; - std::vector< std::vector> historic_g_sqr_; +protected: + std::vector< std::vector> shadow_copies_; + std::vector< std::vector> historic_g_sqr_; - // move these parameter to UpdateOption - size_t size_; - }; + size_t size_; +}; +} // namespace multiverso -} - -#endif // MULTIVERSO_UPDATER_SECOND_ORDER_GRADIENT_UPDATER_H_ \ No newline at end of file +#endif // MULTIVERSO_UPDATER_SECOND_ORDER_GRADIENT_UPDATER_H_ diff --git a/next/include/multiverso/updater/updater.h b/next/include/multiverso/updater/updater.h index 93eea3a..9ec54a9 100644 --- a/next/include/multiverso/updater/updater.h +++ b/next/include/multiverso/updater/updater.h @@ -2,14 +2,21 @@ #define MULTIVERSO_UPDATER_UPDATER_H_ #include +#include #include namespace multiverso { struct UpdateOption { public: - // TODO(feiga): default value; - UpdateOption() { data_[0].i = MV_WorkerId(); } + // TODO(qiwye): make these default value more flexiable + UpdateOption(){ + data_[0].i = MV_WorkerId(); + data_[1].f = 0.0f; + data_[2].f = 0.01f; + data_[3].f = 0.1f; + data_[4].f = 0.1f; + } UpdateOption(const char* data, size_t size) { CopyFrom(data, size); @@ -29,6 +36,15 @@ public: void set_lambda(float lambda) { data_[4].f = lambda; } + std::string toString(){ + std::stringstream ss; + ss << "UpdateOption " << worker_id() << " " << momentum() << " " + << learning_rate() << " " << rho() << " " << lambda() << std::endl; + + return ss.str(); + } + + const char* data() const { return reinterpret_cast(&data_[0]); } size_t size() const { return kSize * sizeof(InternalType); } void CopyFrom(const char* data, size_t size) { diff --git a/next/src/table/matrix_table.cpp b/next/src/table/matrix_table.cpp index 6941bea..e35a79e 100644 --- a/next/src/table/matrix_table.cpp +++ b/next/src/table/matrix_table.cpp @@ -138,7 +138,7 @@ int MatrixWorkerTable::Partition(const std::vector& kv, int rank = MV_ServerIdToRank(it.first); std::vector& vec = (*out)[rank]; vec.push_back(Blob(it.second * sizeof(int))); - if (kv.size() == 2) vec.push_back(Blob(it.second * row_size_)); + if (kv.size() >= 2) vec.push_back(Blob(it.second * row_size_)); } count.clear(); @@ -147,7 +147,7 @@ int MatrixWorkerTable::Partition(const std::vector& kv, int dst = dest[i]; int rank = MV_ServerIdToRank(dst); (*out)[rank][0].As(count[dst]) = keys[i]; - if (kv.size() == 2){ // copy add values + if (kv.size() >= 2){ // copy add values memcpy(&((*out)[rank][1].As(count[dst] * num_col_)), kv[1].data() + offset, row_size_); offset += row_size_; @@ -244,7 +244,9 @@ void MatrixServerTable::ProcessAdd(const std::vector& data) { server_id_, row_offset_, ssize / num_col_); } else { - CHECK(data[1].size() == keys_size * sizeof(T)* num_col_); + Log::Debug("[Debug] Server = %d, keys_size = %d, data[1].size = %d, num_col = %d\n", + server_id_, keys_size, data[1].size() , num_col_); + CHECK(data[1].size() == keys_size * num_col_); int offset_v = 0; CHECK(storage_.size() >= keys_size * num_col_); diff --git a/next/src/updater/updater.cpp b/next/src/updater/updater.cpp index 2665389..c9d3a00 100644 --- a/next/src/updater/updater.cpp +++ b/next/src/updater/updater.cpp @@ -1,7 +1,7 @@ #include "multiverso/updater/updater.h" #include "multiverso/updater/adagrad_updater.h" -#include "multiverso/updater/smooth_gradient_updater.h" +#include "multiverso/updater/momentum_updater.h" #include "multiverso/updater/second_order_gradient_updater.h" #include "multiverso/updater/sgd_updater.h" #include "multiverso/util/configure.h" @@ -32,11 +32,10 @@ Updater* Updater::GetUpdater(size_t) { template Updater* Updater::GetUpdater(size_t size) { std::string type = MV_CONFIG_updater_type; - printf(type.c_str()); if (type == "sgd") return new SGDUpdater(size); if (type == "adagrad") return new AdaGradUpdater(size); - if (type == "smooth_gradient") return new SmoothGradientUpdater(size); - if (type == "second_order") return new SecondOrderUpdater(size); + if (type == "momentum_sgd") return new MomentumUpdater(size); + if (type == "second_order_sgd") return new SecondOrderUpdater(size); // Default: simple updater return new Updater(); }