diff --git a/Makefile b/Makefile index 546de931..ab570882 100644 --- a/Makefile +++ b/Makefile @@ -253,10 +253,17 @@ endif # Debugging ifeq ($(DEBUG), 1) COMMON_FLAGS += -DDEBUG -g -O0 + NVCCFLAGS += -G else COMMON_FLAGS += -DNDEBUG -O2 endif +# cuDNN acceleration configuration. +ifeq ($(USE_CUDNN), 1) + LIBRARIES += cudnn + COMMON_FLAGS += -DUSE_CUDNN +endif + # CPU-only configuration ifeq ($(CPU_ONLY), 1) OBJS := $(PROTO_OBJS) $(CXX_OBJS) @@ -299,7 +306,7 @@ LIBRARY_DIRS += $(BLAS_LIB) # Complete build flags. COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir)) CXXFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS) -NVCCFLAGS := -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS) +NVCCFLAGS += -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS) # mex may invoke an older gcc that is too liberal with -Wuninitalized MATLAB_CXXFLAGS := $(CXXFLAGS) -Wno-uninitialized LINKFLAGS += -fPIC $(COMMON_FLAGS) $(WARNINGS) diff --git a/Makefile.config.example b/Makefile.config.example index 7c96d8a9..5cb0b243 100644 --- a/Makefile.config.example +++ b/Makefile.config.example @@ -1,6 +1,9 @@ ## Refer to http://caffe.berkeleyvision.org/installation.html # Contributions simplifying and improving our build system are welcome! +# cuDNN acceleration switch (uncomment to build with cuDNN). +# USE_CUDNN := 1 + # CPU-only switch (uncomment to build without GPU support). # CPU_ONLY := 1 diff --git a/include/caffe/util/cudnn.hpp b/include/caffe/util/cudnn.hpp new file mode 100644 index 00000000..1fe72f1e --- /dev/null +++ b/include/caffe/util/cudnn.hpp @@ -0,0 +1,92 @@ +#ifndef CAFFE_UTIL_CUDNN_H_ +#define CAFFE_UTIL_CUDNN_H_ +#ifdef USE_CUDNN + +#include + +#include "caffe/proto/caffe.pb.h" + +#define CUDNN_CHECK(condition) \ + do { \ + cudnnStatus_t status = condition; \ + CHECK_EQ(status, CUDNN_STATUS_SUCCESS) << " cuDNN error."; \ + } while (0) + +namespace caffe { + +// TODO(cudnn): check existence, add to CUDN_CHECK +// const char* cudnnGetErrorString(curandStatus_t error); +// +namespace cudnn { + +template class dataType; +template<> class dataType { + public: + static const cudnnDataType_t type = CUDNN_DATA_FLOAT; +}; +template<> class dataType { + public: + static const cudnnDataType_t type = CUDNN_DATA_DOUBLE; +}; + +template +inline void createTensor4dDesc(cudnnTensor4dDescriptor_t* desc, + int n, int c, int h, int w, + int stride_n, int stride_c, int stride_h, int stride_w) { + CUDNN_CHECK(cudnnCreateTensor4dDescriptor(desc)); + CUDNN_CHECK(cudnnSetTensor4dDescriptorEx(*desc, dataType::type, + n, c, h, w, stride_n, stride_c, stride_h, stride_w)); +} + +template +inline void createTensor4dDesc(cudnnTensor4dDescriptor_t* desc, + int n, int c, int h, int w) { + const int stride_w = 1; + const int stride_h = w * stride_w; + const int stride_c = h * stride_h; + const int stride_n = c * stride_c; + createTensor4dDesc(desc, n, c, h, w, + stride_n, stride_c, stride_h, stride_w); +} + +template +inline void createFilterDesc(cudnnFilterDescriptor_t* desc, + int n, int c, int h, int w) { + CUDNN_CHECK(cudnnCreateFilterDescriptor(desc)); + CUDNN_CHECK(cudnnSetFilterDescriptor(*desc, dataType::type, + n, c, h, w)); +} + +template +inline void createConvolutionDesc(cudnnConvolutionDescriptor_t* conv, + cudnnTensor4dDescriptor_t bottom, cudnnFilterDescriptor_t filter, + int pad_h, int pad_w, int stride_h, int stride_w) { + CUDNN_CHECK(cudnnCreateConvolutionDescriptor(conv)); + CUDNN_CHECK(cudnnSetConvolutionDescriptor(*conv, bottom, filter, + pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION)); +} + +template +inline void createPoolingDesc(cudnnPoolingDescriptor_t* conv, + PoolingParameter_PoolMethod poolmethod, cudnnPoolingMode_t* mode, + int h, int w, int stride_h, int stride_w) { + switch (poolmethod) { + case PoolingParameter_PoolMethod_MAX: + *mode = CUDNN_POOLING_MAX; + break; + case PoolingParameter_PoolMethod_AVE: + *mode = CUDNN_POOLING_AVERAGE; + break; + default: + LOG(FATAL) << "Unknown pooling method."; + } + CUDNN_CHECK(cudnnCreatePoolingDescriptor(conv)); + CUDNN_CHECK(cudnnSetPoolingDescriptor(*conv, *mode, h, w, + stride_h, stride_w)); +} + +} // namespace cudnn +} // namespace caffe + +#endif // USE_CUDNN +#endif // CAFFE_UTIL_CUDNN_H_ diff --git a/include/caffe/util/device_alternate.hpp b/include/caffe/util/device_alternate.hpp index bb3ac616..3df28a49 100644 --- a/include/caffe/util/device_alternate.hpp +++ b/include/caffe/util/device_alternate.hpp @@ -36,6 +36,9 @@ void classname::funcname##_##gpu(const vector*>& top, \ #include #include #include // cuda driver types +#ifdef USE_CUDNN // cuDNN acceleration library. +#include "caffe/util/cudnn.hpp" +#endif // // CUDA macros diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp index 09b0f900..b7b10988 100644 --- a/src/caffe/layer_factory.cpp +++ b/src/caffe/layer_factory.cpp @@ -16,6 +16,9 @@ ConvolutionLayer* GetConvolutionLayer(const string& name, ConvolutionParameter_Engine engine = param.convolution_param().engine(); if (engine == ConvolutionParameter_Engine_DEFAULT) { engine = ConvolutionParameter_Engine_CAFFE; +#ifdef USE_CUDNN + engine = ConvolutionParameter_Engine_CUDNN; +#endif } if (engine == ConvolutionParameter_Engine_CAFFE) { return new ConvolutionLayer(param); @@ -36,6 +39,9 @@ PoolingLayer* GetPoolingLayer(const string& name, PoolingParameter_Engine engine = param.pooling_param().engine(); if (engine == PoolingParameter_Engine_DEFAULT) { engine = PoolingParameter_Engine_CAFFE; +#ifdef USE_CUDNN + engine = PoolingParameter_Engine_CUDNN; +#endif } if (engine == PoolingParameter_Engine_CAFFE) { return new PoolingLayer(param); @@ -56,6 +62,9 @@ ReLULayer* GetReLULayer(const string& name, ReLUParameter_Engine engine = param.relu_param().engine(); if (engine == ReLUParameter_Engine_DEFAULT) { engine = ReLUParameter_Engine_CAFFE; +#ifdef USE_CUDNN + engine = ReLUParameter_Engine_CUDNN; +#endif } if (engine == ReLUParameter_Engine_CAFFE) { return new ReLULayer(param); @@ -76,6 +85,9 @@ SigmoidLayer* GetSigmoidLayer(const string& name, SigmoidParameter_Engine engine = param.sigmoid_param().engine(); if (engine == SigmoidParameter_Engine_DEFAULT) { engine = SigmoidParameter_Engine_CAFFE; +#ifdef USE_CUDNN + engine = SigmoidParameter_Engine_CUDNN; +#endif } if (engine == SigmoidParameter_Engine_CAFFE) { return new SigmoidLayer(param); @@ -96,6 +108,9 @@ TanHLayer* GetTanHLayer(const string& name, TanHParameter_Engine engine = param.tanh_param().engine(); if (engine == TanHParameter_Engine_DEFAULT) { engine = TanHParameter_Engine_CAFFE; +#ifdef USE_CUDNN + engine = TanHParameter_Engine_CUDNN; +#endif } if (engine == TanHParameter_Engine_CAFFE) { return new TanHLayer(param); @@ -116,6 +131,9 @@ SoftmaxLayer* GetSoftmaxLayer(const string& name, SoftmaxParameter_Engine engine = param.softmax_param().engine(); if (engine == SoftmaxParameter_Engine_DEFAULT) { engine = SoftmaxParameter_Engine_CAFFE; +#ifdef USE_CUDNN + engine = SoftmaxParameter_Engine_CUDNN; +#endif } if (engine == SoftmaxParameter_Engine_CAFFE) { return new SoftmaxLayer(param); diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index 082feb83..8cb82ceb 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -388,6 +388,7 @@ message ConvolutionParameter { enum Engine { DEFAULT = 0; CAFFE = 1; + CUDNN = 2; } optional Engine engine = 15 [default = DEFAULT]; } @@ -579,6 +580,7 @@ message PoolingParameter { enum Engine { DEFAULT = 0; CAFFE = 1; + CUDNN = 2; } optional Engine engine = 11 [default = DEFAULT]; } @@ -602,6 +604,7 @@ message ReLUParameter { enum Engine { DEFAULT = 0; CAFFE = 1; + CUDNN = 2; } optional Engine engine = 2 [default = DEFAULT]; } @@ -611,6 +614,7 @@ message SigmoidParameter { enum Engine { DEFAULT = 0; CAFFE = 1; + CUDNN = 2; } optional Engine engine = 1 [default = DEFAULT]; } @@ -630,6 +634,7 @@ message SoftmaxParameter { enum Engine { DEFAULT = 0; CAFFE = 1; + CUDNN = 2; } optional Engine engine = 1 [default = DEFAULT]; } @@ -639,6 +644,7 @@ message TanHParameter { enum Engine { DEFAULT = 0; CAFFE = 1; + CUDNN = 2; } optional Engine engine = 1 [default = DEFAULT]; }