This commit is contained in:
Evan Shelhamer 2014-09-01 14:51:24 -07:00
Родитель 3cf3df829e
Коммит 77d91242cd
6 изменённых файлов: 130 добавлений и 1 удалений

Просмотреть файл

@ -253,10 +253,17 @@ endif
# Debugging
ifeq ($(DEBUG), 1)
COMMON_FLAGS += -DDEBUG -g -O0
NVCCFLAGS += -G
else
COMMON_FLAGS += -DNDEBUG -O2
endif
# cuDNN acceleration configuration.
ifeq ($(USE_CUDNN), 1)
LIBRARIES += cudnn
COMMON_FLAGS += -DUSE_CUDNN
endif
# CPU-only configuration
ifeq ($(CPU_ONLY), 1)
OBJS := $(PROTO_OBJS) $(CXX_OBJS)
@ -299,7 +306,7 @@ LIBRARY_DIRS += $(BLAS_LIB)
# Complete build flags.
COMMON_FLAGS += $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir))
CXXFLAGS += -pthread -fPIC $(COMMON_FLAGS) $(WARNINGS)
NVCCFLAGS := -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
NVCCFLAGS += -ccbin=$(CXX) -Xcompiler -fPIC $(COMMON_FLAGS)
# mex may invoke an older gcc that is too liberal with -Wuninitalized
MATLAB_CXXFLAGS := $(CXXFLAGS) -Wno-uninitialized
LINKFLAGS += -fPIC $(COMMON_FLAGS) $(WARNINGS)

Просмотреть файл

@ -1,6 +1,9 @@
## Refer to http://caffe.berkeleyvision.org/installation.html
# Contributions simplifying and improving our build system are welcome!
# cuDNN acceleration switch (uncomment to build with cuDNN).
# USE_CUDNN := 1
# CPU-only switch (uncomment to build without GPU support).
# CPU_ONLY := 1

Просмотреть файл

@ -0,0 +1,92 @@
#ifndef CAFFE_UTIL_CUDNN_H_
#define CAFFE_UTIL_CUDNN_H_
#ifdef USE_CUDNN
#include <cudnn.h>
#include "caffe/proto/caffe.pb.h"
#define CUDNN_CHECK(condition) \
do { \
cudnnStatus_t status = condition; \
CHECK_EQ(status, CUDNN_STATUS_SUCCESS) << " cuDNN error."; \
} while (0)
namespace caffe {
// TODO(cudnn): check existence, add to CUDN_CHECK
// const char* cudnnGetErrorString(curandStatus_t error);
//
namespace cudnn {
template <typename Dtype> class dataType;
template<> class dataType<float> {
public:
static const cudnnDataType_t type = CUDNN_DATA_FLOAT;
};
template<> class dataType<double> {
public:
static const cudnnDataType_t type = CUDNN_DATA_DOUBLE;
};
template <typename Dtype>
inline void createTensor4dDesc(cudnnTensor4dDescriptor_t* desc,
int n, int c, int h, int w,
int stride_n, int stride_c, int stride_h, int stride_w) {
CUDNN_CHECK(cudnnCreateTensor4dDescriptor(desc));
CUDNN_CHECK(cudnnSetTensor4dDescriptorEx(*desc, dataType<Dtype>::type,
n, c, h, w, stride_n, stride_c, stride_h, stride_w));
}
template <typename Dtype>
inline void createTensor4dDesc(cudnnTensor4dDescriptor_t* desc,
int n, int c, int h, int w) {
const int stride_w = 1;
const int stride_h = w * stride_w;
const int stride_c = h * stride_h;
const int stride_n = c * stride_c;
createTensor4dDesc<Dtype>(desc, n, c, h, w,
stride_n, stride_c, stride_h, stride_w);
}
template <typename Dtype>
inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
int n, int c, int h, int w) {
CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
CUDNN_CHECK(cudnnSetFilterDescriptor(*desc, dataType<Dtype>::type,
n, c, h, w));
}
template <typename Dtype>
inline void createConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
cudnnTensor4dDescriptor_t bottom, cudnnFilterDescriptor_t filter,
int pad_h, int pad_w, int stride_h, int stride_w) {
CUDNN_CHECK(cudnnCreateConvolutionDescriptor(conv));
CUDNN_CHECK(cudnnSetConvolutionDescriptor(*conv, bottom, filter,
pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION));
}
template <typename Dtype>
inline void createPoolingDesc(cudnnPoolingDescriptor_t* conv,
PoolingParameter_PoolMethod poolmethod, cudnnPoolingMode_t* mode,
int h, int w, int stride_h, int stride_w) {
switch (poolmethod) {
case PoolingParameter_PoolMethod_MAX:
*mode = CUDNN_POOLING_MAX;
break;
case PoolingParameter_PoolMethod_AVE:
*mode = CUDNN_POOLING_AVERAGE;
break;
default:
LOG(FATAL) << "Unknown pooling method.";
}
CUDNN_CHECK(cudnnCreatePoolingDescriptor(conv));
CUDNN_CHECK(cudnnSetPoolingDescriptor(*conv, *mode, h, w,
stride_h, stride_w));
}
} // namespace cudnn
} // namespace caffe
#endif // USE_CUDNN
#endif // CAFFE_UTIL_CUDNN_H_

Просмотреть файл

@ -36,6 +36,9 @@ void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
#include <cuda_runtime.h>
#include <curand.h>
#include <driver_types.h> // cuda driver types
#ifdef USE_CUDNN // cuDNN acceleration library.
#include "caffe/util/cudnn.hpp"
#endif
//
// CUDA macros

Просмотреть файл

@ -16,6 +16,9 @@ ConvolutionLayer<Dtype>* GetConvolutionLayer(const string& name,
ConvolutionParameter_Engine engine = param.convolution_param().engine();
if (engine == ConvolutionParameter_Engine_DEFAULT) {
engine = ConvolutionParameter_Engine_CAFFE;
#ifdef USE_CUDNN
engine = ConvolutionParameter_Engine_CUDNN;
#endif
}
if (engine == ConvolutionParameter_Engine_CAFFE) {
return new ConvolutionLayer<Dtype>(param);
@ -36,6 +39,9 @@ PoolingLayer<Dtype>* GetPoolingLayer(const string& name,
PoolingParameter_Engine engine = param.pooling_param().engine();
if (engine == PoolingParameter_Engine_DEFAULT) {
engine = PoolingParameter_Engine_CAFFE;
#ifdef USE_CUDNN
engine = PoolingParameter_Engine_CUDNN;
#endif
}
if (engine == PoolingParameter_Engine_CAFFE) {
return new PoolingLayer<Dtype>(param);
@ -56,6 +62,9 @@ ReLULayer<Dtype>* GetReLULayer(const string& name,
ReLUParameter_Engine engine = param.relu_param().engine();
if (engine == ReLUParameter_Engine_DEFAULT) {
engine = ReLUParameter_Engine_CAFFE;
#ifdef USE_CUDNN
engine = ReLUParameter_Engine_CUDNN;
#endif
}
if (engine == ReLUParameter_Engine_CAFFE) {
return new ReLULayer<Dtype>(param);
@ -76,6 +85,9 @@ SigmoidLayer<Dtype>* GetSigmoidLayer(const string& name,
SigmoidParameter_Engine engine = param.sigmoid_param().engine();
if (engine == SigmoidParameter_Engine_DEFAULT) {
engine = SigmoidParameter_Engine_CAFFE;
#ifdef USE_CUDNN
engine = SigmoidParameter_Engine_CUDNN;
#endif
}
if (engine == SigmoidParameter_Engine_CAFFE) {
return new SigmoidLayer<Dtype>(param);
@ -96,6 +108,9 @@ TanHLayer<Dtype>* GetTanHLayer(const string& name,
TanHParameter_Engine engine = param.tanh_param().engine();
if (engine == TanHParameter_Engine_DEFAULT) {
engine = TanHParameter_Engine_CAFFE;
#ifdef USE_CUDNN
engine = TanHParameter_Engine_CUDNN;
#endif
}
if (engine == TanHParameter_Engine_CAFFE) {
return new TanHLayer<Dtype>(param);
@ -116,6 +131,9 @@ SoftmaxLayer<Dtype>* GetSoftmaxLayer(const string& name,
SoftmaxParameter_Engine engine = param.softmax_param().engine();
if (engine == SoftmaxParameter_Engine_DEFAULT) {
engine = SoftmaxParameter_Engine_CAFFE;
#ifdef USE_CUDNN
engine = SoftmaxParameter_Engine_CUDNN;
#endif
}
if (engine == SoftmaxParameter_Engine_CAFFE) {
return new SoftmaxLayer<Dtype>(param);

Просмотреть файл

@ -388,6 +388,7 @@ message ConvolutionParameter {
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 15 [default = DEFAULT];
}
@ -579,6 +580,7 @@ message PoolingParameter {
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 11 [default = DEFAULT];
}
@ -602,6 +604,7 @@ message ReLUParameter {
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 2 [default = DEFAULT];
}
@ -611,6 +614,7 @@ message SigmoidParameter {
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 1 [default = DEFAULT];
}
@ -630,6 +634,7 @@ message SoftmaxParameter {
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 1 [default = DEFAULT];
}
@ -639,6 +644,7 @@ message TanHParameter {
enum Engine {
DEFAULT = 0;
CAFFE = 1;
CUDNN = 2;
}
optional Engine engine = 1 [default = DEFAULT];
}