diff --git a/next/IMultiverso.vcxproj b/next/IMultiverso.vcxproj
index f9345e6..50beb2d 100644
--- a/next/IMultiverso.vcxproj
+++ b/next/IMultiverso.vcxproj
@@ -192,7 +192,7 @@
     <ClInclude Include="include\multiverso\updater\adagrad_updater.h" />
     <ClInclude Include="include\multiverso\updater\sgd_updater.h" />
     <ClInclude Include="include\multiverso\updater\second_order_gradient_updater.h" />
-    <ClInclude Include="include\multiverso\updater\smooth_gradient_updater.h" />
+    <ClInclude Include="include\multiverso\updater\momentum_updater.h" />
     <ClInclude Include="include\multiverso\updater\updater.h" />
     <ClInclude Include="include\multiverso\util\configure.h" />
     <ClInclude Include="include\multiverso\util\async_buffer.h" />
diff --git a/next/IMultiverso.vcxproj.filters b/next/IMultiverso.vcxproj.filters
index 9a78ae8..206782b 100644
--- a/next/IMultiverso.vcxproj.filters
+++ b/next/IMultiverso.vcxproj.filters
@@ -100,9 +100,6 @@
     <ClInclude Include="include\multiverso\updater\updater.h">
       <Filter>updater</Filter>
     </ClInclude>
-    <ClInclude Include="include\multiverso\updater\smooth_gradient_updater.h">
-      <Filter>updater</Filter>
-    </ClInclude>
     <ClInclude Include="include\multiverso\updater\adagrad_updater.h">
       <Filter>updater</Filter>
     </ClInclude>
@@ -115,6 +112,9 @@
     <ClInclude Include="include\multiverso\updater\sgd_updater.h">
       <Filter>updater</Filter>
     </ClInclude>
+    <ClInclude Include="include\multiverso\updater\momentum_updater.h">
+      <Filter>updater</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <Filter Include="include">
diff --git a/next/Test/main.cpp b/next/Test/main.cpp
index 90b00bb..2f2aafa 100644
--- a/next/Test/main.cpp
+++ b/next/Test/main.cpp
@@ -102,6 +102,7 @@ void TestArray(int argc, char* argv[]) {
     UpdateOption option;
     option.set_learning_rate(1 - 0.0001 * i);
     option.set_momentum(0.99);
+    option.set_rho(0.01f);
     shared_array->Add(delta.data(), 1000000, &option);
 
 
@@ -343,8 +344,8 @@ void TestMatrix(int argc, char* argv[]){
   //  Log::Debug("rank %d has no worker\n", MV_Rank());
   // }
 
-	MatrixWorkerTable<int>* worker_table = new MatrixWorkerTable<int>(num_row, num_col);
-	MatrixServerTable<int>* server_table = new MatrixServerTable<int>(num_row, num_col);
+	MatrixWorkerTable<float>* worker_table = new MatrixWorkerTable<float>(num_row, num_col);
+	MatrixServerTable<float>* server_table = new MatrixServerTable<float>(num_row, num_col);
 	std::thread* m_prefetchThread = nullptr;
 	MV_Barrier();
 
@@ -359,21 +360,22 @@ void TestMatrix(int argc, char* argv[]){
 		std::vector<int> v = { 0, 1, 5, 10 };
 
 		// test data
-		std::vector<int> delta(size);
+		std::vector<float> delta(size);
 		for (int i = 0; i < size; ++i)
 			delta[i] = i;
 
-		int * data = new int[size];
+		float * data = new float[size];
 		m_prefetchThread = new std::thread([&](){
 
-			worker_table->Add(delta.data(), size); //add all
+      UpdateOption option;
+			worker_table->Add(delta.data(), size, &option); //add all
 
 			worker_table->Get(data, size); //get all
 			printf("----------------------------\n");
 			for (int i = 0; i < num_row; ++i){
 				printf("rank %d, row %d: ", MV_Rank(), i);
 				for (int j = 0; j < num_col; ++j)
-					printf("%d ", data[i * num_col + j]);
+					printf("%.2f ", data[i * num_col + j]);
 				printf("\n");
 			};
 		});
@@ -386,9 +388,10 @@ void TestMatrix(int argc, char* argv[]){
 			m_prefetchThread = nullptr;
 		}
 		//test data_vec
-		std::vector<int*> data_rows = { &data[0], &data[num_col], &data[5 * num_col], &data[10 * num_col] };
-		std::vector<int*> delta_rows = { &delta[0], &delta[num_col], &delta[5 * num_col], &delta[10 * num_col] };
-		worker_table->Add(v, delta_rows, num_col);
+		std::vector<float*> data_rows = { &data[0], &data[num_col], &data[5 * num_col], &data[10 * num_col] };
+		std::vector<float*> delta_rows = { &delta[0], &delta[num_col], &delta[5 * num_col], &delta[10 * num_col] };
+    UpdateOption option;
+		worker_table->Add(v, delta_rows, num_col, &option);
 		worker_table->Get(v, data_rows, num_col);
 		MV_Barrier();
 
@@ -396,7 +399,7 @@ void TestMatrix(int argc, char* argv[]){
 		for (int i = 0; i < num_row; ++i){
 			printf("rank %d, row %d: ", MV_Rank(), i);
 			for (int j = 0; j < num_col; ++j)
-				printf("%d ", data[i * num_col + j]);
+				printf("%.2f ", data[i * num_col + j]);
 			printf("\n");
 		}
 		MV_Barrier();
diff --git a/next/include/multiverso/updater/adagrad_updater.h b/next/include/multiverso/updater/adagrad_updater.h
index 3ae5029..77ed15d 100644
--- a/next/include/multiverso/updater/adagrad_updater.h
+++ b/next/include/multiverso/updater/adagrad_updater.h
@@ -5,6 +5,8 @@
 
 #include <vector>
 #include <cmath>
+#include <cstdint>
+
 
 namespace multiverso {
 
@@ -13,25 +15,41 @@ class AdaGradUpdater : public Updater<T> {
 public:
   explicit AdaGradUpdater(size_t size):
     e(1e-6f), size_(size) {  
-    historic_g_sqr_.resize(MV_NumWorkers());
-    for (auto s : historic_g_sqr_){
-      s.resize(size_);
-    }
-    Log::Debug("[AdaGradUpdater] Init with size = %d, e = %f. \n", size_, e);
+    historic_g_sqr_.resize(MV_NumWorkers(), std::vector<T>(size_));
+    Log::Debug("[AdaGradUpdater] Init with size = %d, e = %f. historic_size = %d\n", size_, e, historic_g_sqr_.size());
   }
+
   void Update(size_t num_element, T* data, T* delta, 
               UpdateOption* option, size_t offset) override {
+    auto g_sqr_data_ = historic_g_sqr_.at(option->worker_id());
     for (size_t index = 0; index < num_element; ++index) {
-
-      historic_g_sqr_[option->worker_id()][index + offset] -=
+      g_sqr_data_[index + offset] -=
         delta[index] * delta[index] / option->learning_rate() / 
         option->learning_rate();
 
+      //[TODO(qiwye)] sqrt take too much time
       data[index + offset] -= option->rho() /
-        std::sqrt(historic_g_sqr_[option->worker_id()][index + offset] + e) *
+        std::sqrt(g_sqr_data_[index + offset] + e) *
         delta[index] / option->learning_rate();
+
+      //data[index + offset] -= option->rho() *
+      //  QuakeRsqrt(g_sqr_data_[index + offset] + e) *
+      //  delta[index] / option->learning_rate();
     }
   }
+
+
+private:
+
+  float QuakeRsqrt(float number){
+    float x = number * 0.5f, y = number;
+    std::uint32_t i;
+    std::memcpy(&i, &y, sizeof(float));
+    i = 0x5f3759df - (i >> 1);
+    std::memcpy(&y, &i, sizeof(float));
+    return y * (1.5f - (x * y * y));
+  }
+
 protected:
     std::vector< std::vector<T>> historic_g_sqr_;
     float e;
diff --git a/next/include/multiverso/updater/smooth_gradient_updater.h b/next/include/multiverso/updater/momentum_updater.h
similarity index 67%
rename from next/include/multiverso/updater/smooth_gradient_updater.h
rename to next/include/multiverso/updater/momentum_updater.h
index 71b2e42..68470c4 100644
--- a/next/include/multiverso/updater/smooth_gradient_updater.h
+++ b/next/include/multiverso/updater/momentum_updater.h
@@ -1,5 +1,5 @@
-#ifndef MULTIVERSO_UPDATER_SMOOTH_GRADIENT_UPDATER_H_
-#define MULTIVERSO_UPDATER_SMOOTH_GRADIENT_UPDATER_H_
+#ifndef MULTIVERSO_UPDATER_MOMENTUM_UPDATER_H_
+#define MULTIVERSO_UPDATER_MOMENTUM_UPDATER_H_
 
 #include "updater.h"
 #include <vector>
@@ -7,9 +7,9 @@
 namespace multiverso {
 
 template <typename T>
-class SmoothGradientUpdater : public Updater<T> {
+class MomentumUpdater : public Updater<T> {
 public:
-  explicit SmoothGradientUpdater(size_t size) : size_(size) {
+  explicit MomentumUpdater(size_t size) : size_(size) {
     Log::Debug("[SmoothGradientUpdater] Init with size = %d. \n", size_);
     smooth_gradient_.resize(size_);
   }
@@ -22,7 +22,7 @@ public:
       data[index + offset] -= smooth_gradient_[index + offset];
     }
   }
-  ~SmoothGradientUpdater() { smooth_gradient_.clear(); }
+  ~MomentumUpdater() { smooth_gradient_.clear(); }
 protected:
   std::vector<T> smooth_gradient_;
   size_t size_;
@@ -30,4 +30,4 @@ protected:
 
 }
 
-#endif // MULTIVERSO_UPDATER_SMOOTH_GRADIENT_UPDATER_H_
\ No newline at end of file
+#endif // MULTIVERSO_UPDATER_MOMENTUM_UPDATER_H_
\ No newline at end of file
diff --git a/next/include/multiverso/updater/second_order_gradient_updater.h b/next/include/multiverso/updater/second_order_gradient_updater.h
index 0b1786e..31b373c 100644
--- a/next/include/multiverso/updater/second_order_gradient_updater.h
+++ b/next/include/multiverso/updater/second_order_gradient_updater.h
@@ -1,58 +1,64 @@
 #ifndef MULTIVERSO_UPDATER_SECOND_ORDER_GRADIENT_UPDATER_H_
 #define MULTIVERSO_UPDATER_SECOND_ORDER_GRADIENT_UPDATER_H_
 
+#include <cmath>
+#include <vector>
+
 #include "updater.h"
 
-#include <cmath>
 
 namespace multiverso {
 
-  // [TODO(qiwye)]:rename the class to Shuxin Zheng's algorithms
-  template <typename T>
-  class SecondOrderUpdater : public Updater<T> {
-  public:
-    explicit SecondOrderUpdater(size_t size) : 
-      size_(size) {
-      Log::Debug("[SecondOrderUpdater] Init with size = %d. \n", size_);
-      shadow_copies_.resize(MV_NumWorkers());
-      for (auto s : shadow_copies_){
-        s.resize(size);
-      }
-      historic_g_sqr_.resize(MV_NumWorkers());
-      for (auto s : historic_g_sqr_){
-        s.resize(size);
-      }
+// [TODO(qiwye)]:rename the class to Shuxin Zheng's algorithms
+template <typename T>
+class SecondOrderUpdater : public Updater<T> {
+public:
+  explicit SecondOrderUpdater(size_t size) :
+    size_(size) {
+    Log::Debug("[SecondOrderUpdater] Init with size = %d. \n", size_);
+    shadow_copies_.resize(MV_NumWorkers(), std::vector<T>(size_));
+    historic_g_sqr_.resize(MV_NumWorkers(), std::vector<T>(size_));
+  }
+  void Update(size_t num_element, T*data, T*delta,
+    UpdateOption* option, size_t offset) override {
+    auto g_sqr_data_ = historic_g_sqr_.at(option->worker_id());
+    auto copies_data_ = shadow_copies_.at(option->worker_id());
+    for (size_t index = 0; index < num_element; ++index) {
+      // gsqr = (1 - r) * g^2 + r * gsqr
+      g_sqr_data_[index + offset] =
+        (1 - option->rho()) * delta[index] * delta[index]
+        / option->learning_rate() / option->learning_rate() +
+        option->rho() * g_sqr_data_[index + offset];
+
+      data[index + offset] -= delta[index] + option->lambda() *
+        std::sqrt(g_sqr_data_[index + offset]) *
+        (data[index + offset] - copies_data_[index + offset]);
+
+      // caching each worker's latest version of parameter
+      copies_data_[index + offset] = data[index + offset];
     }
-    void Update(size_t num_element, T*data, T*delta,
-      UpdateOption* option, size_t offset) override {
-      
-      for (size_t index = 0; index < num_element; ++index) {
-        // gsqr = (1 - r) * g^2 + r * gsqr  
-        historic_g_sqr_[option->worker_id()][index + offset] = 
-          (1 - option->rho()) * delta[index] * delta[index] 
-          / option->learning_rate() / option->learning_rate() +
-          option->rho() * historic_g_sqr_[option->worker_id()][index + offset];
+  }
+  ~SecondOrderUpdater() {
+    shadow_copies_.clear();
+    historic_g_sqr_.clear();
+  }
 
-        data[index + offset] -= delta[index] + option->lambda() * 
-          std::sqrt(historic_g_sqr_[option->worker_id()][index + offset]) *
-          (data[index + offset] - shadow_copies_[option->worker_id()][index + offset]);
+private:
+  float QuakeRsqrt(float number) {
+    float x = number * 0.5f, y = number;
+    std::uint32_t i;
+    std::memcpy(&i, &y, sizeof(float));
+    i = 0x5f3759df - (i >> 1);
+    std::memcpy(&y, &i, sizeof(float));
+    return y * (1.5f - (x * y * y));
+  }
 
-        // caching each worker's latest version of parameter
-        shadow_copies_[option->worker_id()][index + offset] = data[index + offset];
-      }
-    }
-    ~SecondOrderUpdater() { 
-      shadow_copies_.clear();
-      historic_g_sqr_.clear();
-    }
-  protected:
-    std::vector< std::vector<T>> shadow_copies_;
-    std::vector< std::vector<T>> historic_g_sqr_;
+protected:
+  std::vector< std::vector<T>> shadow_copies_;
+  std::vector< std::vector<T>> historic_g_sqr_;
 
-    // move these parameter to UpdateOption
-    size_t size_;
-  };
+  size_t size_;
+};
+}  // namespace multiverso
 
-}
-
-#endif // MULTIVERSO_UPDATER_SECOND_ORDER_GRADIENT_UPDATER_H_
\ No newline at end of file
+#endif  // MULTIVERSO_UPDATER_SECOND_ORDER_GRADIENT_UPDATER_H_
diff --git a/next/include/multiverso/updater/updater.h b/next/include/multiverso/updater/updater.h
index 93eea3a..9ec54a9 100644
--- a/next/include/multiverso/updater/updater.h
+++ b/next/include/multiverso/updater/updater.h
@@ -2,14 +2,21 @@
 #define MULTIVERSO_UPDATER_UPDATER_H_
 
 #include <cstring>
+#include <sstream>
 #include <multiverso/multiverso.h>
 
 namespace multiverso {
 
 struct UpdateOption {
 public:
-  // TODO(feiga): default value;
-  UpdateOption() { data_[0].i = MV_WorkerId(); }
+  // TODO(qiwye): make these default value more flexiable
+  UpdateOption(){
+    data_[0].i = MV_WorkerId(); 
+    data_[1].f = 0.0f;
+    data_[2].f = 0.01f;
+    data_[3].f = 0.1f;
+    data_[4].f = 0.1f;
+  }
 
   UpdateOption(const char* data, size_t size) {
     CopyFrom(data, size);
@@ -29,6 +36,15 @@ public:
   void set_lambda(float lambda) { data_[4].f = lambda; }
 
 
+  std::string toString(){
+    std::stringstream  ss;
+    ss << "UpdateOption " << worker_id() << " " << momentum() << " "
+      << learning_rate() << " " << rho() << " " << lambda() << std::endl;
+
+    return ss.str();
+  }
+
+
   const char* data() const { return reinterpret_cast<const char*>(&data_[0]); }
   size_t size() const { return kSize * sizeof(InternalType); }
   void CopyFrom(const char* data, size_t size) { 
diff --git a/next/src/table/matrix_table.cpp b/next/src/table/matrix_table.cpp
index 6941bea..e35a79e 100644
--- a/next/src/table/matrix_table.cpp
+++ b/next/src/table/matrix_table.cpp
@@ -138,7 +138,7 @@ int MatrixWorkerTable<T>::Partition(const std::vector<Blob>& kv,
     int rank = MV_ServerIdToRank(it.first);
     std::vector<Blob>& vec = (*out)[rank];
     vec.push_back(Blob(it.second * sizeof(int)));
-    if (kv.size() == 2) vec.push_back(Blob(it.second * row_size_));
+    if (kv.size() >= 2) vec.push_back(Blob(it.second * row_size_));
   }
   count.clear();
 
@@ -147,7 +147,7 @@ int MatrixWorkerTable<T>::Partition(const std::vector<Blob>& kv,
     int dst = dest[i];
     int rank = MV_ServerIdToRank(dst);
     (*out)[rank][0].As<int>(count[dst]) = keys[i];
-    if (kv.size() == 2){ // copy add values
+    if (kv.size() >= 2){ // copy add values
       memcpy(&((*out)[rank][1].As<T>(count[dst] * num_col_)),
         kv[1].data() + offset, row_size_);
       offset += row_size_;
@@ -244,7 +244,9 @@ void MatrixServerTable<T>::ProcessAdd(const std::vector<Blob>& data) {
       server_id_, row_offset_, ssize / num_col_);
   }
   else {
-    CHECK(data[1].size() == keys_size * sizeof(T)* num_col_);
+    Log::Debug("[Debug] Server = %d, keys_size = %d, data[1].size = %d, num_col = %d\n",
+      server_id_, keys_size, data[1].size() , num_col_);
+    CHECK(data[1].size() == keys_size * num_col_);
 
     int offset_v = 0;
     CHECK(storage_.size() >= keys_size * num_col_);
diff --git a/next/src/updater/updater.cpp b/next/src/updater/updater.cpp
index 2665389..c9d3a00 100644
--- a/next/src/updater/updater.cpp
+++ b/next/src/updater/updater.cpp
@@ -1,7 +1,7 @@
 #include "multiverso/updater/updater.h"
 
 #include "multiverso/updater/adagrad_updater.h"
-#include "multiverso/updater/smooth_gradient_updater.h"
+#include "multiverso/updater/momentum_updater.h"
 #include "multiverso/updater/second_order_gradient_updater.h"
 #include "multiverso/updater/sgd_updater.h"
 #include "multiverso/util/configure.h"
@@ -32,11 +32,10 @@ Updater<int>* Updater<int>::GetUpdater(size_t) {
 template <typename T>
 Updater<T>* Updater<T>::GetUpdater(size_t size) {
   std::string type = MV_CONFIG_updater_type;
-  printf(type.c_str());
   if (type == "sgd") return new SGDUpdater<T>(size);
   if (type == "adagrad") return new AdaGradUpdater<T>(size);
-  if (type == "smooth_gradient") return new SmoothGradientUpdater<T>(size);
-  if (type == "second_order") return new SecondOrderUpdater<T>(size);
+  if (type == "momentum_sgd") return new MomentumUpdater<T>(size);
+  if (type == "second_order_sgd") return new SecondOrderUpdater<T>(size);
   // Default: simple updater
   return new Updater<T>();
 }