зеркало из https://github.com/microsoft/caffe.git
add cuDNN to build
This commit is contained in:
Родитель
3cf3df829e
Коммит
77d91242cd
9
Makefile
9
Makefile
|
@ -253,10 +253,17 @@ endif
|
|||
# Debugging
|
||||
ifeq ($(DEBUG), 1)
|
||||
COMMON_FLAGS += -DDEBUG -g -O0
|
||||
NVCCFLAGS += -G
|
||||
else
|
||||
COMMON_FLAGS += -DNDEBUG -O2
|
||||
endif
|
||||
|
||||
# cuDNN acceleration configuration.
|
||||
ifeq ($(USE_CUDNN), 1)
|
||||
LIBRARIES += cudnn
|
||||
COMMON_FLAGS += -DUSE_CUDNN
|
||||
endif
|
||||
|
||||
# CPU-only configuration
|
||||
ifeq ($(CPU_ONLY), 1)
|
||||
OBJS := $(PROTO_OBJS) $(CXX_OBJS)
|
||||
|
@ -299,7 +306,7 @@ LIBRARY_DIRS += $(BLAS_LIB)
|
|||
# Complete build flags.
|
||||
COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir))
|
||||
CXXFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS)
|
||||
NVCCFLAGS := -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
|
||||
NVCCFLAGS += -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
|
||||
# mex may invoke an older gcc that is too liberal with -Wuninitalized
|
||||
MATLAB_CXXFLAGS := $(CXXFLAGS) -Wno-uninitialized
|
||||
LINKFLAGS += -fPIC $(COMMON_FLAGS) $(WARNINGS)
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
## Refer to http://caffe.berkeleyvision.org/installation.html
|
||||
# Contributions simplifying and improving our build system are welcome!
|
||||
|
||||
# cuDNN acceleration switch (uncomment to build with cuDNN).
|
||||
# USE_CUDNN := 1
|
||||
|
||||
# CPU-only switch (uncomment to build without GPU support).
|
||||
# CPU_ONLY := 1
|
||||
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
#ifndef CAFFE_UTIL_CUDNN_H_
|
||||
#define CAFFE_UTIL_CUDNN_H_
|
||||
#ifdef USE_CUDNN
|
||||
|
||||
#include <cudnn.h>
|
||||
|
||||
#include "caffe/proto/caffe.pb.h"
|
||||
|
||||
#define CUDNN_CHECK(condition) \
|
||||
do { \
|
||||
cudnnStatus_t status = condition; \
|
||||
CHECK_EQ(status, CUDNN_STATUS_SUCCESS) << " cuDNN error."; \
|
||||
} while (0)
|
||||
|
||||
namespace caffe {
|
||||
|
||||
// TODO(cudnn): check existence, add to CUDN_CHECK
|
||||
// const char* cudnnGetErrorString(curandStatus_t error);
|
||||
//
|
||||
namespace cudnn {
|
||||
|
||||
template <typename Dtype> class dataType;
|
||||
template<> class dataType<float> {
|
||||
public:
|
||||
static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
|
||||
};
|
||||
template<> class dataType<double> {
|
||||
public:
|
||||
static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
|
||||
};
|
||||
|
||||
template <typename Dtype>
|
||||
inline void createTensor4dDesc(cudnnTensor4dDescriptor_t* desc,
|
||||
int n, int c, int h, int w,
|
||||
int stride_n, int stride_c, int stride_h, int stride_w) {
|
||||
CUDNN_CHECK(cudnnCreateTensor4dDescriptor(desc));
|
||||
CUDNN_CHECK(cudnnSetTensor4dDescriptorEx(*desc, dataType<Dtype>::type,
|
||||
n, c, h, w, stride_n, stride_c, stride_h, stride_w));
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
inline void createTensor4dDesc(cudnnTensor4dDescriptor_t* desc,
|
||||
int n, int c, int h, int w) {
|
||||
const int stride_w = 1;
|
||||
const int stride_h = w * stride_w;
|
||||
const int stride_c = h * stride_h;
|
||||
const int stride_n = c * stride_c;
|
||||
createTensor4dDesc<Dtype>(desc, n, c, h, w,
|
||||
stride_n, stride_c, stride_h, stride_w);
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
|
||||
int n, int c, int h, int w) {
|
||||
CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
|
||||
CUDNN_CHECK(cudnnSetFilterDescriptor(*desc, dataType<Dtype>::type,
|
||||
n, c, h, w));
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
inline void createConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
|
||||
cudnnTensor4dDescriptor_t bottom, cudnnFilterDescriptor_t filter,
|
||||
int pad_h, int pad_w, int stride_h, int stride_w) {
|
||||
CUDNN_CHECK(cudnnCreateConvolutionDescriptor(conv));
|
||||
CUDNN_CHECK(cudnnSetConvolutionDescriptor(*conv, bottom, filter,
|
||||
pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION));
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
inline void createPoolingDesc(cudnnPoolingDescriptor_t* conv,
|
||||
PoolingParameter_PoolMethod poolmethod, cudnnPoolingMode_t* mode,
|
||||
int h, int w, int stride_h, int stride_w) {
|
||||
switch (poolmethod) {
|
||||
case PoolingParameter_PoolMethod_MAX:
|
||||
*mode = CUDNN_POOLING_MAX;
|
||||
break;
|
||||
case PoolingParameter_PoolMethod_AVE:
|
||||
*mode = CUDNN_POOLING_AVERAGE;
|
||||
break;
|
||||
default:
|
||||
LOG(FATAL) << "Unknown pooling method.";
|
||||
}
|
||||
CUDNN_CHECK(cudnnCreatePoolingDescriptor(conv));
|
||||
CUDNN_CHECK(cudnnSetPoolingDescriptor(*conv, *mode, h, w,
|
||||
stride_h, stride_w));
|
||||
}
|
||||
|
||||
} // namespace cudnn
|
||||
} // namespace caffe
|
||||
|
||||
#endif // USE_CUDNN
|
||||
#endif // CAFFE_UTIL_CUDNN_H_
|
|
@ -36,6 +36,9 @@ void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
|
|||
#include <cuda_runtime.h>
|
||||
#include <curand.h>
|
||||
#include <driver_types.h> // cuda driver types
|
||||
#ifdef USE_CUDNN // cuDNN acceleration library.
|
||||
#include "caffe/util/cudnn.hpp"
|
||||
#endif
|
||||
|
||||
//
|
||||
// CUDA macros
|
||||
|
|
|
@ -16,6 +16,9 @@ ConvolutionLayer<Dtype>* GetConvolutionLayer(const string& name,
|
|||
ConvolutionParameter_Engine engine = param.convolution_param().engine();
|
||||
if (engine == ConvolutionParameter_Engine_DEFAULT) {
|
||||
engine = ConvolutionParameter_Engine_CAFFE;
|
||||
#ifdef USE_CUDNN
|
||||
engine = ConvolutionParameter_Engine_CUDNN;
|
||||
#endif
|
||||
}
|
||||
if (engine == ConvolutionParameter_Engine_CAFFE) {
|
||||
return new ConvolutionLayer<Dtype>(param);
|
||||
|
@ -36,6 +39,9 @@ PoolingLayer<Dtype>* GetPoolingLayer(const string& name,
|
|||
PoolingParameter_Engine engine = param.pooling_param().engine();
|
||||
if (engine == PoolingParameter_Engine_DEFAULT) {
|
||||
engine = PoolingParameter_Engine_CAFFE;
|
||||
#ifdef USE_CUDNN
|
||||
engine = PoolingParameter_Engine_CUDNN;
|
||||
#endif
|
||||
}
|
||||
if (engine == PoolingParameter_Engine_CAFFE) {
|
||||
return new PoolingLayer<Dtype>(param);
|
||||
|
@ -56,6 +62,9 @@ ReLULayer<Dtype>* GetReLULayer(const string& name,
|
|||
ReLUParameter_Engine engine = param.relu_param().engine();
|
||||
if (engine == ReLUParameter_Engine_DEFAULT) {
|
||||
engine = ReLUParameter_Engine_CAFFE;
|
||||
#ifdef USE_CUDNN
|
||||
engine = ReLUParameter_Engine_CUDNN;
|
||||
#endif
|
||||
}
|
||||
if (engine == ReLUParameter_Engine_CAFFE) {
|
||||
return new ReLULayer<Dtype>(param);
|
||||
|
@ -76,6 +85,9 @@ SigmoidLayer<Dtype>* GetSigmoidLayer(const string& name,
|
|||
SigmoidParameter_Engine engine = param.sigmoid_param().engine();
|
||||
if (engine == SigmoidParameter_Engine_DEFAULT) {
|
||||
engine = SigmoidParameter_Engine_CAFFE;
|
||||
#ifdef USE_CUDNN
|
||||
engine = SigmoidParameter_Engine_CUDNN;
|
||||
#endif
|
||||
}
|
||||
if (engine == SigmoidParameter_Engine_CAFFE) {
|
||||
return new SigmoidLayer<Dtype>(param);
|
||||
|
@ -96,6 +108,9 @@ TanHLayer<Dtype>* GetTanHLayer(const string& name,
|
|||
TanHParameter_Engine engine = param.tanh_param().engine();
|
||||
if (engine == TanHParameter_Engine_DEFAULT) {
|
||||
engine = TanHParameter_Engine_CAFFE;
|
||||
#ifdef USE_CUDNN
|
||||
engine = TanHParameter_Engine_CUDNN;
|
||||
#endif
|
||||
}
|
||||
if (engine == TanHParameter_Engine_CAFFE) {
|
||||
return new TanHLayer<Dtype>(param);
|
||||
|
@ -116,6 +131,9 @@ SoftmaxLayer<Dtype>* GetSoftmaxLayer(const string& name,
|
|||
SoftmaxParameter_Engine engine = param.softmax_param().engine();
|
||||
if (engine == SoftmaxParameter_Engine_DEFAULT) {
|
||||
engine = SoftmaxParameter_Engine_CAFFE;
|
||||
#ifdef USE_CUDNN
|
||||
engine = SoftmaxParameter_Engine_CUDNN;
|
||||
#endif
|
||||
}
|
||||
if (engine == SoftmaxParameter_Engine_CAFFE) {
|
||||
return new SoftmaxLayer<Dtype>(param);
|
||||
|
|
|
@ -388,6 +388,7 @@ message ConvolutionParameter {
|
|||
enum Engine {
|
||||
DEFAULT = 0;
|
||||
CAFFE = 1;
|
||||
CUDNN = 2;
|
||||
}
|
||||
optional Engine engine = 15 [default = DEFAULT];
|
||||
}
|
||||
|
@ -579,6 +580,7 @@ message PoolingParameter {
|
|||
enum Engine {
|
||||
DEFAULT = 0;
|
||||
CAFFE = 1;
|
||||
CUDNN = 2;
|
||||
}
|
||||
optional Engine engine = 11 [default = DEFAULT];
|
||||
}
|
||||
|
@ -602,6 +604,7 @@ message ReLUParameter {
|
|||
enum Engine {
|
||||
DEFAULT = 0;
|
||||
CAFFE = 1;
|
||||
CUDNN = 2;
|
||||
}
|
||||
optional Engine engine = 2 [default = DEFAULT];
|
||||
}
|
||||
|
@ -611,6 +614,7 @@ message SigmoidParameter {
|
|||
enum Engine {
|
||||
DEFAULT = 0;
|
||||
CAFFE = 1;
|
||||
CUDNN = 2;
|
||||
}
|
||||
optional Engine engine = 1 [default = DEFAULT];
|
||||
}
|
||||
|
@ -630,6 +634,7 @@ message SoftmaxParameter {
|
|||
enum Engine {
|
||||
DEFAULT = 0;
|
||||
CAFFE = 1;
|
||||
CUDNN = 2;
|
||||
}
|
||||
optional Engine engine = 1 [default = DEFAULT];
|
||||
}
|
||||
|
@ -639,6 +644,7 @@ message TanHParameter {
|
|||
enum Engine {
|
||||
DEFAULT = 0;
|
||||
CAFFE = 1;
|
||||
CUDNN = 2;
|
||||
}
|
||||
optional Engine engine = 1 [default = DEFAULT];
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче