added explanation notes to syncedmem

2013-11-19 09:08:31 -08:00 · 2013-11-19 09:08:31 -08:00 · 46a431ac74
--- a/include/caffe/syncedmem.hpp
+++ b/include/caffe/syncedmem.hpp
@ -9,32 +9,19 @@

 namespace caffe {

-
-#if 0
-
-// This chunk of code should be used when one has a machine that does not have
-// GPU, thus cannot be used if we just want to distribute a single binary.
-
-inline void CaffeMallocHost(void** ptr, size_t size) {
-  if (Caffe::mode() == Caffe::GPU) {
-    CUDA_CHECK(cudaMallocHost(ptr, size));
-  } else {
-    *ptr = malloc(size);
-  }
-}
-
-inline void CaffeFreeHost(void* ptr) {
-  if (Caffe::mode() == Caffe::GPU) {
-    CUDA_CHECK(cudaFreeHost(ptr));
-  } else {
-    free(ptr);
-  }
-}
-
-#else
-
-// This chunk of code is safer, but may not be as fast as the cuda pinned memory
-// version.
+// Theoretically, CaffeMallocHost and CaffeFreeHost should simply call the
+// cudaMallocHost and cudaFree functions in order to create pinned memory.
+// However, those codes rely on the existence of a cuda GPU (I don't know
+// why that is a must since allocating memory should not be accessing the
+// GPU resorce, but it just creates an error as of Cuda 5.0) and will cause
+// problem when running on a machine without GPU. Thus, we simply define
+// these two functions for safety and possible future change if the problem
+// of calling cuda functions disappears in a future version.
+// 
+// In practice, although we are creating unpinned memory here, as long as we
+// are constantly accessing them the memory pages almost always stays in
+// the physical memory (assuming we have large enough memory installed), and
+// does not seem to create a memory bottleneck here.

 inline void CaffeMallocHost(void** ptr, size_t size) {
  *ptr = malloc(size);
@ -44,7 +31,6 @@ inline void CaffeFreeHost(void* ptr) {
  free(ptr);
 }

-#endif  // code to define CaffeMallocHost and CaffeFreeHost

 class SyncedMemory {
 public: