renaming the smooth updater to momentum updater;

This commit is contained in:
Qiwei Ye 2016-04-13 16:18:50 +08:00
Родитель 29359a5413
Коммит 146ef4878f
9 изменённых файлов: 126 добавлений и 82 удалений

Просмотреть файл

@ -192,7 +192,7 @@
<ClInclude Include="include\multiverso\updater\adagrad_updater.h" />
<ClInclude Include="include\multiverso\updater\sgd_updater.h" />
<ClInclude Include="include\multiverso\updater\second_order_gradient_updater.h" />
<ClInclude Include="include\multiverso\updater\smooth_gradient_updater.h" />
<ClInclude Include="include\multiverso\updater\momentum_updater.h" />
<ClInclude Include="include\multiverso\updater\updater.h" />
<ClInclude Include="include\multiverso\util\configure.h" />
<ClInclude Include="include\multiverso\util\async_buffer.h" />

Просмотреть файл

@ -100,9 +100,6 @@
<ClInclude Include="include\multiverso\updater\updater.h">
<Filter>updater</Filter>
</ClInclude>
<ClInclude Include="include\multiverso\updater\smooth_gradient_updater.h">
<Filter>updater</Filter>
</ClInclude>
<ClInclude Include="include\multiverso\updater\adagrad_updater.h">
<Filter>updater</Filter>
</ClInclude>
@ -115,6 +112,9 @@
<ClInclude Include="include\multiverso\updater\sgd_updater.h">
<Filter>updater</Filter>
</ClInclude>
<ClInclude Include="include\multiverso\updater\momentum_updater.h">
<Filter>updater</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="include">

Просмотреть файл

@ -102,6 +102,7 @@ void TestArray(int argc, char* argv[]) {
UpdateOption option;
option.set_learning_rate(1 - 0.0001 * i);
option.set_momentum(0.99);
option.set_rho(0.01f);
shared_array->Add(delta.data(), 1000000, &option);
@ -343,8 +344,8 @@ void TestMatrix(int argc, char* argv[]){
// Log::Debug("rank %d has no worker\n", MV_Rank());
// }
MatrixWorkerTable<int>* worker_table = new MatrixWorkerTable<int>(num_row, num_col);
MatrixServerTable<int>* server_table = new MatrixServerTable<int>(num_row, num_col);
MatrixWorkerTable<float>* worker_table = new MatrixWorkerTable<float>(num_row, num_col);
MatrixServerTable<float>* server_table = new MatrixServerTable<float>(num_row, num_col);
std::thread* m_prefetchThread = nullptr;
MV_Barrier();
@ -359,21 +360,22 @@ void TestMatrix(int argc, char* argv[]){
std::vector<int> v = { 0, 1, 5, 10 };
// test data
std::vector<int> delta(size);
std::vector<float> delta(size);
for (int i = 0; i < size; ++i)
delta[i] = i;
int * data = new int[size];
float * data = new float[size];
m_prefetchThread = new std::thread([&](){
worker_table->Add(delta.data(), size); //add all
UpdateOption option;
worker_table->Add(delta.data(), size, &option); //add all
worker_table->Get(data, size); //get all
printf("----------------------------\n");
for (int i = 0; i < num_row; ++i){
printf("rank %d, row %d: ", MV_Rank(), i);
for (int j = 0; j < num_col; ++j)
printf("%d ", data[i * num_col + j]);
printf("%.2f ", data[i * num_col + j]);
printf("\n");
};
});
@ -386,9 +388,10 @@ void TestMatrix(int argc, char* argv[]){
m_prefetchThread = nullptr;
}
//test data_vec
std::vector<int*> data_rows = { &data[0], &data[num_col], &data[5 * num_col], &data[10 * num_col] };
std::vector<int*> delta_rows = { &delta[0], &delta[num_col], &delta[5 * num_col], &delta[10 * num_col] };
worker_table->Add(v, delta_rows, num_col);
std::vector<float*> data_rows = { &data[0], &data[num_col], &data[5 * num_col], &data[10 * num_col] };
std::vector<float*> delta_rows = { &delta[0], &delta[num_col], &delta[5 * num_col], &delta[10 * num_col] };
UpdateOption option;
worker_table->Add(v, delta_rows, num_col, &option);
worker_table->Get(v, data_rows, num_col);
MV_Barrier();
@ -396,7 +399,7 @@ void TestMatrix(int argc, char* argv[]){
for (int i = 0; i < num_row; ++i){
printf("rank %d, row %d: ", MV_Rank(), i);
for (int j = 0; j < num_col; ++j)
printf("%d ", data[i * num_col + j]);
printf("%.2f ", data[i * num_col + j]);
printf("\n");
}
MV_Barrier();

Просмотреть файл

@ -5,6 +5,8 @@
#include <vector>
#include <cmath>
#include <cstdint>
namespace multiverso {
@ -13,25 +15,41 @@ class AdaGradUpdater : public Updater<T> {
public:
explicit AdaGradUpdater(size_t size):
e(1e-6f), size_(size) {
historic_g_sqr_.resize(MV_NumWorkers());
for (auto s : historic_g_sqr_){
s.resize(size_);
}
Log::Debug("[AdaGradUpdater] Init with size = %d, e = %f. \n", size_, e);
historic_g_sqr_.resize(MV_NumWorkers(), std::vector<T>(size_));
Log::Debug("[AdaGradUpdater] Init with size = %d, e = %f. historic_size = %d\n", size_, e, historic_g_sqr_.size());
}
void Update(size_t num_element, T* data, T* delta,
UpdateOption* option, size_t offset) override {
auto g_sqr_data_ = historic_g_sqr_.at(option->worker_id());
for (size_t index = 0; index < num_element; ++index) {
historic_g_sqr_[option->worker_id()][index + offset] -=
g_sqr_data_[index + offset] -=
delta[index] * delta[index] / option->learning_rate() /
option->learning_rate();
//[TODO(qiwye)] sqrt take too much time
data[index + offset] -= option->rho() /
std::sqrt(historic_g_sqr_[option->worker_id()][index + offset] + e) *
std::sqrt(g_sqr_data_[index + offset] + e) *
delta[index] / option->learning_rate();
//data[index + offset] -= option->rho() *
// QuakeRsqrt(g_sqr_data_[index + offset] + e) *
// delta[index] / option->learning_rate();
}
}
private:
float QuakeRsqrt(float number){
float x = number * 0.5f, y = number;
std::uint32_t i;
std::memcpy(&i, &y, sizeof(float));
i = 0x5f3759df - (i >> 1);
std::memcpy(&y, &i, sizeof(float));
return y * (1.5f - (x * y * y));
}
protected:
std::vector< std::vector<T>> historic_g_sqr_;
float e;

Просмотреть файл

@ -1,5 +1,5 @@
#ifndef MULTIVERSO_UPDATER_SMOOTH_GRADIENT_UPDATER_H_
#define MULTIVERSO_UPDATER_SMOOTH_GRADIENT_UPDATER_H_
#ifndef MULTIVERSO_UPDATER_MOMENTUM_UPDATER_H_
#define MULTIVERSO_UPDATER_MOMENTUM_UPDATER_H_
#include "updater.h"
#include <vector>
@ -7,9 +7,9 @@
namespace multiverso {
template <typename T>
class SmoothGradientUpdater : public Updater<T> {
class MomentumUpdater : public Updater<T> {
public:
explicit SmoothGradientUpdater(size_t size) : size_(size) {
explicit MomentumUpdater(size_t size) : size_(size) {
Log::Debug("[SmoothGradientUpdater] Init with size = %d. \n", size_);
smooth_gradient_.resize(size_);
}
@ -22,7 +22,7 @@ public:
data[index + offset] -= smooth_gradient_[index + offset];
}
}
~SmoothGradientUpdater() { smooth_gradient_.clear(); }
~MomentumUpdater() { smooth_gradient_.clear(); }
protected:
std::vector<T> smooth_gradient_;
size_t size_;
@ -30,4 +30,4 @@ protected:
}
#endif // MULTIVERSO_UPDATER_SMOOTH_GRADIENT_UPDATER_H_
#endif // MULTIVERSO_UPDATER_MOMENTUM_UPDATER_H_

Просмотреть файл

@ -1,58 +1,64 @@
#ifndef MULTIVERSO_UPDATER_SECOND_ORDER_GRADIENT_UPDATER_H_
#define MULTIVERSO_UPDATER_SECOND_ORDER_GRADIENT_UPDATER_H_
#include <cmath>
#include <vector>
#include "updater.h"
#include <cmath>
namespace multiverso {
// [TODO(qiwye)]:rename the class to Shuxin Zheng's algorithms
template <typename T>
class SecondOrderUpdater : public Updater<T> {
public:
explicit SecondOrderUpdater(size_t size) :
size_(size) {
Log::Debug("[SecondOrderUpdater] Init with size = %d. \n", size_);
shadow_copies_.resize(MV_NumWorkers());
for (auto s : shadow_copies_){
s.resize(size);
}
historic_g_sqr_.resize(MV_NumWorkers());
for (auto s : historic_g_sqr_){
s.resize(size);
}
// [TODO(qiwye)]:rename the class to Shuxin Zheng's algorithms
template <typename T>
class SecondOrderUpdater : public Updater<T> {
public:
explicit SecondOrderUpdater(size_t size) :
size_(size) {
Log::Debug("[SecondOrderUpdater] Init with size = %d. \n", size_);
shadow_copies_.resize(MV_NumWorkers(), std::vector<T>(size_));
historic_g_sqr_.resize(MV_NumWorkers(), std::vector<T>(size_));
}
void Update(size_t num_element, T*data, T*delta,
UpdateOption* option, size_t offset) override {
auto g_sqr_data_ = historic_g_sqr_.at(option->worker_id());
auto copies_data_ = shadow_copies_.at(option->worker_id());
for (size_t index = 0; index < num_element; ++index) {
// gsqr = (1 - r) * g^2 + r * gsqr
g_sqr_data_[index + offset] =
(1 - option->rho()) * delta[index] * delta[index]
/ option->learning_rate() / option->learning_rate() +
option->rho() * g_sqr_data_[index + offset];
data[index + offset] -= delta[index] + option->lambda() *
std::sqrt(g_sqr_data_[index + offset]) *
(data[index + offset] - copies_data_[index + offset]);
// caching each worker's latest version of parameter
copies_data_[index + offset] = data[index + offset];
}
void Update(size_t num_element, T*data, T*delta,
UpdateOption* option, size_t offset) override {
for (size_t index = 0; index < num_element; ++index) {
// gsqr = (1 - r) * g^2 + r * gsqr
historic_g_sqr_[option->worker_id()][index + offset] =
(1 - option->rho()) * delta[index] * delta[index]
/ option->learning_rate() / option->learning_rate() +
option->rho() * historic_g_sqr_[option->worker_id()][index + offset];
}
~SecondOrderUpdater() {
shadow_copies_.clear();
historic_g_sqr_.clear();
}
data[index + offset] -= delta[index] + option->lambda() *
std::sqrt(historic_g_sqr_[option->worker_id()][index + offset]) *
(data[index + offset] - shadow_copies_[option->worker_id()][index + offset]);
private:
float QuakeRsqrt(float number) {
float x = number * 0.5f, y = number;
std::uint32_t i;
std::memcpy(&i, &y, sizeof(float));
i = 0x5f3759df - (i >> 1);
std::memcpy(&y, &i, sizeof(float));
return y * (1.5f - (x * y * y));
}
// caching each worker's latest version of parameter
shadow_copies_[option->worker_id()][index + offset] = data[index + offset];
}
}
~SecondOrderUpdater() {
shadow_copies_.clear();
historic_g_sqr_.clear();
}
protected:
std::vector< std::vector<T>> shadow_copies_;
std::vector< std::vector<T>> historic_g_sqr_;
protected:
std::vector< std::vector<T>> shadow_copies_;
std::vector< std::vector<T>> historic_g_sqr_;
// move these parameter to UpdateOption
size_t size_;
};
size_t size_;
};
} // namespace multiverso
}
#endif // MULTIVERSO_UPDATER_SECOND_ORDER_GRADIENT_UPDATER_H_
#endif // MULTIVERSO_UPDATER_SECOND_ORDER_GRADIENT_UPDATER_H_

Просмотреть файл

@ -2,14 +2,21 @@
#define MULTIVERSO_UPDATER_UPDATER_H_
#include <cstring>
#include <sstream>
#include <multiverso/multiverso.h>
namespace multiverso {
struct UpdateOption {
public:
// TODO(feiga): default value;
UpdateOption() { data_[0].i = MV_WorkerId(); }
// TODO(qiwye): make these default value more flexiable
UpdateOption(){
data_[0].i = MV_WorkerId();
data_[1].f = 0.0f;
data_[2].f = 0.01f;
data_[3].f = 0.1f;
data_[4].f = 0.1f;
}
UpdateOption(const char* data, size_t size) {
CopyFrom(data, size);
@ -29,6 +36,15 @@ public:
void set_lambda(float lambda) { data_[4].f = lambda; }
std::string toString(){
std::stringstream ss;
ss << "UpdateOption " << worker_id() << " " << momentum() << " "
<< learning_rate() << " " << rho() << " " << lambda() << std::endl;
return ss.str();
}
const char* data() const { return reinterpret_cast<const char*>(&data_[0]); }
size_t size() const { return kSize * sizeof(InternalType); }
void CopyFrom(const char* data, size_t size) {

Просмотреть файл

@ -138,7 +138,7 @@ int MatrixWorkerTable<T>::Partition(const std::vector<Blob>& kv,
int rank = MV_ServerIdToRank(it.first);
std::vector<Blob>& vec = (*out)[rank];
vec.push_back(Blob(it.second * sizeof(int)));
if (kv.size() == 2) vec.push_back(Blob(it.second * row_size_));
if (kv.size() >= 2) vec.push_back(Blob(it.second * row_size_));
}
count.clear();
@ -147,7 +147,7 @@ int MatrixWorkerTable<T>::Partition(const std::vector<Blob>& kv,
int dst = dest[i];
int rank = MV_ServerIdToRank(dst);
(*out)[rank][0].As<int>(count[dst]) = keys[i];
if (kv.size() == 2){ // copy add values
if (kv.size() >= 2){ // copy add values
memcpy(&((*out)[rank][1].As<T>(count[dst] * num_col_)),
kv[1].data() + offset, row_size_);
offset += row_size_;
@ -244,7 +244,9 @@ void MatrixServerTable<T>::ProcessAdd(const std::vector<Blob>& data) {
server_id_, row_offset_, ssize / num_col_);
}
else {
CHECK(data[1].size() == keys_size * sizeof(T)* num_col_);
Log::Debug("[Debug] Server = %d, keys_size = %d, data[1].size = %d, num_col = %d\n",
server_id_, keys_size, data[1].size() , num_col_);
CHECK(data[1].size() == keys_size * num_col_);
int offset_v = 0;
CHECK(storage_.size() >= keys_size * num_col_);

Просмотреть файл

@ -1,7 +1,7 @@
#include "multiverso/updater/updater.h"
#include "multiverso/updater/adagrad_updater.h"
#include "multiverso/updater/smooth_gradient_updater.h"
#include "multiverso/updater/momentum_updater.h"
#include "multiverso/updater/second_order_gradient_updater.h"
#include "multiverso/updater/sgd_updater.h"
#include "multiverso/util/configure.h"
@ -32,11 +32,10 @@ Updater<int>* Updater<int>::GetUpdater(size_t) {
template <typename T>
Updater<T>* Updater<T>::GetUpdater(size_t size) {
std::string type = MV_CONFIG_updater_type;
printf(type.c_str());
if (type == "sgd") return new SGDUpdater<T>(size);
if (type == "adagrad") return new AdaGradUpdater<T>(size);
if (type == "smooth_gradient") return new SmoothGradientUpdater<T>(size);
if (type == "second_order") return new SecondOrderUpdater<T>(size);
if (type == "momentum_sgd") return new MomentumUpdater<T>(size);
if (type == "second_order_sgd") return new SecondOrderUpdater<T>(size);
// Default: simple updater
return new Updater<T>();
}