cudnn: enabled build on Linux with cuDNN.
This commit is contained in:
Родитель
895c10a197
Коммит
3023f11e6e
11
Makefile
11
Makefile
|
@ -23,6 +23,8 @@
|
|||
# If not specified, GPU will not be enabled
|
||||
# CUB_PATH= path to NVIDIA CUB installation, so $(CUB_PATH)/cub/cub.cuh exists
|
||||
# defaults to /usr/local/cub-1.4.1
|
||||
# CUDNN_PATH= path to NVIDIA cuDNN installation so $(CUDNN_PATH)/cuda/include/cudnn.h exists
|
||||
# If not specified, CNTK will be be built without cuDNN.
|
||||
# KALDI_PATH= Path to Kaldi
|
||||
# If not specified, Kaldi plugins will not be built
|
||||
# OPENCV_PATH= path to OpenCV 3.0.0 installation, so $(OPENCV_PATH) exists
|
||||
|
@ -102,6 +104,13 @@ ifdef CUDA_PATH
|
|||
LIBPATH += $(CUDA_PATH)/lib64
|
||||
LIBS += -lcublas -lcudart -lcuda -lcurand -lcusparse -lnvidia-ml
|
||||
|
||||
# Set up cuDNN if needed
|
||||
ifdef CUDNN_PATH
|
||||
INCLUDEPATH += $(CUDNN_PATH)/cuda/include
|
||||
LIBPATH += $(CUDNN_PATH)/cuda/lib64
|
||||
LIBS += -lcudnn
|
||||
CPPFLAGS +=-DUSE_CUDNN
|
||||
endif
|
||||
else
|
||||
DEVICE = cpu
|
||||
|
||||
|
@ -218,6 +227,7 @@ MATH_SRC =\
|
|||
Math/Math/QuantizedMatrix.cpp \
|
||||
Math/Math/Matrix.cpp \
|
||||
Math/Math/CUDAPageLockedMemAllocator.cpp \
|
||||
Math/Math/ConvolutionEngine.cpp \
|
||||
|
||||
ifdef CUDA_PATH
|
||||
MATH_SRC +=\
|
||||
|
@ -225,6 +235,7 @@ MATH_SRC +=\
|
|||
Math/Math/GPUSparseMatrix.cu \
|
||||
Math/Math/GPUWatcher.cu \
|
||||
Math/Math/MatrixQuantizerGPU.cu \
|
||||
Math/Math/CuDnnConvolutionEngine.cpp \
|
||||
|
||||
else
|
||||
MATH_SRC +=\
|
||||
|
|
|
@ -10,63 +10,16 @@
|
|||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
template<class ElemType>
|
||||
class DefaultConvolutionEngineFactory : public ConvolutionEngineFactory<ElemType>
|
||||
{
|
||||
public:
|
||||
DefaultConvolutionEngineFactory(DEVICEID_TYPE deviceId)
|
||||
: ConvolutionEngineFactory<ElemType>(deviceId)
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
Tensor4DPtr CreateTensor(size_t w, size_t h, size_t c, size_t n) override
|
||||
{
|
||||
return std::make_unique<ConvolutionTensor4D>(w, h, c, n);
|
||||
}
|
||||
|
||||
FilterPtr CreateFilter(size_t w, size_t h, size_t c, size_t k) override
|
||||
{
|
||||
return std::make_unique<Filter>(w, h, c, k);
|
||||
}
|
||||
|
||||
ConvDescPtr CreateConvDescriptor(const Tensor4D& /*inT*/, const Filter& /*filterT*/,
|
||||
size_t wStride, size_t hStride, bool padding) override
|
||||
{
|
||||
return std::make_unique<ConvDesc>(wStride, hStride, padding);
|
||||
}
|
||||
|
||||
PoolDescPtr CreatePoolDescriptor(PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad) override
|
||||
{
|
||||
return std::make_unique<PoolDesc>(kind, w, h, wStride, hStride, wPad, hPad);
|
||||
}
|
||||
|
||||
ConvEnginePtr CreateConvEngine(size_t maxTempMemSizeInSamples) override
|
||||
{
|
||||
return std::make_unique<DefaultConvolutionEngine<ElemType>>(m_deviceId, maxTempMemSizeInSamples);
|
||||
}
|
||||
|
||||
PoolEnginePtr CreatePoolEngine() override
|
||||
{
|
||||
return std::make_unique<DefaultPoolingEngine<ElemType>>();
|
||||
}
|
||||
};
|
||||
|
||||
template<class ElemType>
|
||||
std::unique_ptr<ConvolutionEngineFactory<ElemType>> ConvolutionEngineFactory<ElemType>::Create(DEVICEID_TYPE deviceId)
|
||||
{
|
||||
// REVIEW alexeyk: make cuDNN default when running on GPU and compiled with cuDNN, add config parameter to enable runtime switch between implementations.
|
||||
if (deviceId >= 0 && CuDnnConvolutionEngineFactory<ElemType>::IsSupported())
|
||||
return std::make_unique<CuDnnConvolutionEngineFactory<ElemType>>(deviceId);
|
||||
return std::make_unique<DefaultConvolutionEngineFactory<ElemType>>(deviceId);
|
||||
}
|
||||
|
||||
template class ConvolutionEngineFactory<float>;
|
||||
template class ConvolutionEngineFactory<double>;
|
||||
|
||||
template<class ElemType>
|
||||
class DefaultConvolutionEngine : public ConvolutionEngine<ElemType>
|
||||
{
|
||||
public:
|
||||
using Base = ConvolutionEngine<ElemType>;
|
||||
using typename Base::Mat;
|
||||
using typename Base::Tensor4D;
|
||||
using typename Base::Filter;
|
||||
using typename Base::ConvDesc;
|
||||
|
||||
public:
|
||||
DefaultConvolutionEngine(DEVICEID_TYPE deviceId, size_t maxTempMemSizeInSamples)
|
||||
: m_tempMatrix(deviceId), m_maxTempMemSizeInSamples(maxTempMemSizeInSamples)
|
||||
|
@ -294,6 +247,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
class DefaultPoolingEngine : public PoolingEngine<ElemType>
|
||||
{
|
||||
public:
|
||||
using Base = PoolingEngine<ElemType>;
|
||||
using typename Base::Tensor4D;
|
||||
using typename Base::PoolDesc;
|
||||
using typename Base::Mat;
|
||||
|
||||
public:
|
||||
void Forward(const Tensor4D& inT, const Mat& in, const PoolDesc& poolDesc, const Tensor4D& outT, Mat& out) override
|
||||
{
|
||||
|
@ -349,4 +308,75 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
template class PoolingEngine<float>;
|
||||
template class PoolingEngine<double>;
|
||||
|
||||
template<class ElemType>
|
||||
class DefaultConvolutionEngineFactory : public ConvolutionEngineFactory<ElemType>
|
||||
{
|
||||
public:
|
||||
using Base = ConvolutionEngineFactory<ElemType>;
|
||||
using typename Base::Tensor4D;
|
||||
using typename Base::Tensor4DPtr;
|
||||
using typename Base::Filter;
|
||||
using typename Base::FilterPtr;
|
||||
using typename Base::ConvDesc;
|
||||
using typename Base::ConvDescPtr;
|
||||
using typename Base::PoolDesc;
|
||||
using typename Base::PoolDescPtr;
|
||||
|
||||
using typename Base::ConvEnginePtr;
|
||||
using typename Base::PoolEnginePtr;
|
||||
|
||||
using Base::m_deviceId;
|
||||
|
||||
public:
|
||||
DefaultConvolutionEngineFactory(DEVICEID_TYPE deviceId)
|
||||
: ConvolutionEngineFactory<ElemType>(deviceId)
|
||||
{
|
||||
}
|
||||
|
||||
public:
|
||||
Tensor4DPtr CreateTensor(size_t w, size_t h, size_t c, size_t n) override
|
||||
{
|
||||
return std::make_unique<ConvolutionTensor4D>(w, h, c, n);
|
||||
}
|
||||
|
||||
FilterPtr CreateFilter(size_t w, size_t h, size_t c, size_t k) override
|
||||
{
|
||||
return std::make_unique<Filter>(w, h, c, k);
|
||||
}
|
||||
|
||||
ConvDescPtr CreateConvDescriptor(const Tensor4D& /*inT*/, const Filter& /*filterT*/,
|
||||
size_t wStride, size_t hStride, bool padding) override
|
||||
{
|
||||
return std::make_unique<ConvDesc>(wStride, hStride, padding);
|
||||
}
|
||||
|
||||
PoolDescPtr CreatePoolDescriptor(typename PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad) override
|
||||
{
|
||||
return std::make_unique<PoolDesc>(kind, w, h, wStride, hStride, wPad, hPad);
|
||||
}
|
||||
|
||||
ConvEnginePtr CreateConvEngine(size_t maxTempMemSizeInSamples) override
|
||||
{
|
||||
return std::make_unique<DefaultConvolutionEngine<ElemType>>(m_deviceId, maxTempMemSizeInSamples);
|
||||
}
|
||||
|
||||
PoolEnginePtr CreatePoolEngine() override
|
||||
{
|
||||
return std::make_unique<DefaultPoolingEngine<ElemType>>();
|
||||
}
|
||||
};
|
||||
|
||||
template<class ElemType>
|
||||
std::unique_ptr<ConvolutionEngineFactory<ElemType>> ConvolutionEngineFactory<ElemType>::Create(DEVICEID_TYPE deviceId)
|
||||
{
|
||||
// REVIEW alexeyk: make cuDNN default when running on GPU and compiled with cuDNN, add config parameter to enable runtime switch between implementations.
|
||||
//if (deviceId >= 0 && CuDnnConvolutionEngineFactory<ElemType>::IsSupported())
|
||||
return std::make_unique<CuDnnConvolutionEngineFactory<ElemType>>(deviceId);
|
||||
//return std::make_unique<DefaultConvolutionEngineFactory<ElemType>>(deviceId);
|
||||
}
|
||||
|
||||
template class ConvolutionEngineFactory<float>;
|
||||
template class ConvolutionEngineFactory<double>;
|
||||
|
||||
}}}
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#ifdef USE_CUDNN
|
||||
#include <cudnn.h>
|
||||
|
||||
template<> static const char* CudaErrString(cudnnStatus_t x)
|
||||
template<> const char* CudaErrString(cudnnStatus_t x)
|
||||
{
|
||||
return cudnnGetErrorString(x);
|
||||
}
|
||||
|
@ -202,12 +202,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
class CuDnnConvolutionEngine : public ConvolutionEngine<ElemType>
|
||||
{
|
||||
public:
|
||||
using Tensor4D = ConvolutionTensor4D;
|
||||
using Tensor4DPtr = std::unique_ptr<Tensor4D>;
|
||||
using Filter = ConvolutionFilter;
|
||||
using FilterPtr = std::unique_ptr<ConvolutionFilter>;
|
||||
using ConvDesc = ConvolutionDescriptor;
|
||||
using ConvDescPtr = std::unique_ptr<ConvolutionDescriptor>;
|
||||
using Base = ConvolutionEngine<ElemType>;
|
||||
using typename Base::Mat;
|
||||
using typename Base::Tensor4D;
|
||||
using typename Base::Filter;
|
||||
using typename Base::ConvDesc;
|
||||
|
||||
CuDnnConvolutionEngine(DEVICEID_TYPE deviceId, size_t maxTempMemSizeInSamples)
|
||||
: m_maxTempMemSizeInSamples(maxTempMemSizeInSamples), m_cudnn(nullptr), m_curMBSize(0), m_tempC(deviceId)
|
||||
|
@ -495,6 +494,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
class CuDnnPoolingEngine : public PoolingEngine<ElemType>
|
||||
{
|
||||
public:
|
||||
using Base = PoolingEngine<ElemType>;
|
||||
using typename Base::Tensor4D;
|
||||
using typename Base::PoolDesc;
|
||||
using typename Base::Mat;
|
||||
|
||||
public:
|
||||
CuDnnPoolingEngine()
|
||||
: m_cudnn(nullptr)
|
||||
|
@ -546,7 +551,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
typename CuDnnConvolutionEngineFactory<ElemType>::Tensor4DPtr CuDnnConvolutionEngineFactory<ElemType>::CreateTensor(size_t w, size_t h, size_t c, size_t n)
|
||||
{
|
||||
static_assert(false, "cuDNN engine currently supports only single and double precision tensors.");
|
||||
// REVIEW alexeyk: assert fires in GCC but not in VC++.
|
||||
//static_assert(false, "cuDNN engine currently supports only single and double precision tensors.");
|
||||
}
|
||||
template<>
|
||||
typename CuDnnConvolutionEngineFactory<float>::Tensor4DPtr CuDnnConvolutionEngineFactory<float>::CreateTensor(size_t w, size_t h, size_t c, size_t n)
|
||||
|
@ -562,7 +568,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
typename CuDnnConvolutionEngineFactory<ElemType>::FilterPtr CuDnnConvolutionEngineFactory<ElemType>::CreateFilter(size_t w, size_t h, size_t c, size_t k)
|
||||
{
|
||||
static_assert(false, "cuDNN engine currently supports only single and double precision filters.");
|
||||
// REVIEW alexeyk: assert fires in GCC but not in VC++.
|
||||
//static_assert(false, "cuDNN engine currently supports only single and double precision filters.");
|
||||
}
|
||||
template<>
|
||||
typename CuDnnConvolutionEngineFactory<float>::FilterPtr CuDnnConvolutionEngineFactory<float>::CreateFilter(size_t w, size_t h, size_t c, size_t k)
|
||||
|
@ -586,7 +593,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
template<class ElemType>
|
||||
typename CuDnnConvolutionEngineFactory<ElemType>::PoolDescPtr CuDnnConvolutionEngineFactory<ElemType>::CreatePoolDescriptor(
|
||||
PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad)
|
||||
typename PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad)
|
||||
{
|
||||
return std::make_unique<CuDnnPoolingDescriptor>(kind, w, h, wStride, hStride, wPad, hPad);
|
||||
}
|
||||
|
|
|
@ -13,6 +13,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
template<class ElemType>
|
||||
class CuDnnConvolutionEngineFactory : public ConvolutionEngineFactory<ElemType>
|
||||
{
|
||||
public:
|
||||
using Base = ConvolutionEngineFactory<ElemType>;
|
||||
using typename Base::Tensor4D;
|
||||
using typename Base::Tensor4DPtr;
|
||||
using typename Base::Filter;
|
||||
using typename Base::FilterPtr;
|
||||
using typename Base::ConvDesc;
|
||||
using typename Base::ConvDescPtr;
|
||||
using typename Base::PoolDesc;
|
||||
using typename Base::PoolDescPtr;
|
||||
|
||||
using typename Base::ConvEnginePtr;
|
||||
using typename Base::PoolEnginePtr;
|
||||
|
||||
using Base::m_deviceId;
|
||||
|
||||
public:
|
||||
CuDnnConvolutionEngineFactory(DEVICEID_TYPE deviceId)
|
||||
: ConvolutionEngineFactory<ElemType>(deviceId)
|
||||
|
@ -22,9 +38,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
public:
|
||||
Tensor4DPtr CreateTensor(size_t w, size_t h, size_t c, size_t n) override;
|
||||
FilterPtr CreateFilter(size_t w, size_t h, size_t c, size_t k) override;
|
||||
ConvDescPtr CreateConvDescriptor(const Tensor4D& inT, const Filter& filterT,
|
||||
ConvDescPtr CreateConvDescriptor(const Tensor4D& inT, const Filter& filterT,
|
||||
size_t wStride, size_t hStride, bool padding) override;
|
||||
PoolDescPtr CreatePoolDescriptor(PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad) override;
|
||||
PoolDescPtr CreatePoolDescriptor(typename PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad) override;
|
||||
|
||||
ConvEnginePtr CreateConvEngine(size_t maxTempMemSizeInSamples) override;
|
||||
PoolEnginePtr CreatePoolEngine() override;
|
||||
|
|
|
@ -32,6 +32,10 @@ have_cub=no
|
|||
cub_path=
|
||||
cub_check=cub/cub.cuh
|
||||
|
||||
have_cudnn=no
|
||||
cudnn_path=
|
||||
cudnn_check=cuda/include/cudnn.h
|
||||
|
||||
have_opencv=no
|
||||
opencv_path=
|
||||
opencv_check=include/opencv2/opencv.hpp
|
||||
|
@ -49,6 +53,7 @@ default_cudas="cuda-7.5 cuda-7.0 cuda-6.5"
|
|||
default_kaldis="kaldi-trunk"
|
||||
default_gdks=". gdk/usr"
|
||||
default_cubs="cub-1.4.1"
|
||||
default_cudnns="cudnn-4.0"
|
||||
default_opencvs="opencv-3.0.0"
|
||||
|
||||
function default_paths ()
|
||||
|
@ -115,6 +120,11 @@ function find_cub ()
|
|||
find_dir "$default_cubs" "$cub_check"
|
||||
}
|
||||
|
||||
function find_cudnn ()
|
||||
{
|
||||
find_dir "$default_cudnns" "$cudnn_check"
|
||||
}
|
||||
|
||||
function find_opencv ()
|
||||
{
|
||||
find_dir "$default_opencvs" "$opencv_check"
|
||||
|
@ -162,6 +172,7 @@ function show_help ()
|
|||
echo " --with-cuda[=directory] $(show_default $(find_cuda))"
|
||||
echo " --with-cub[=directory] $(show_default $(find_cub))"
|
||||
echo " --with-gdk[=directory] $(show_default $(find_gdk))"
|
||||
echo " --with-cudnn[=directory] $(show_default $(find_cudnn))"
|
||||
echo " --with-acml[=directory] $(show_default $(find_acml))"
|
||||
echo " --with-mkl[=directory] $(show_default $(find_mkl))"
|
||||
echo " --with-buildtype=(debug|release) $(show_default $default_buildtype)"
|
||||
|
@ -278,6 +289,27 @@ do
|
|||
fi
|
||||
fi
|
||||
;;
|
||||
--with-cudnn*)
|
||||
have_cudnn=yes
|
||||
if test x$optarg = x
|
||||
then
|
||||
cudnn_path=$(find_cudnn)
|
||||
if test x$cudnn_path = x
|
||||
then
|
||||
echo "Cannot find NVIDIA cuDNN directory."
|
||||
echo "Please specify a value for --with-cudnn"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
if test $(check_dir $optarg $cudnn_check) = yes
|
||||
then
|
||||
cudnn_path=$optarg
|
||||
else
|
||||
echo "Invalid cuDNN directory $optarg"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
--with-acml*)
|
||||
have_acml=yes
|
||||
mathlib=acml
|
||||
|
@ -448,6 +480,18 @@ then
|
|||
echo Found CUB at $cub_path
|
||||
fi
|
||||
fi
|
||||
|
||||
if test $enable_cuda = yes && test x$cudnn_path = x
|
||||
then
|
||||
cudnn_path=$(find_cudnn)
|
||||
if test x$cudnn_path = x ; then
|
||||
echo Cannot locate NVIDIA cuDNN directory
|
||||
echo CNTK will use default convolution engine.
|
||||
else
|
||||
echo Found cuDNN at $cudnn_path
|
||||
fi
|
||||
fi
|
||||
|
||||
config=$build_top/Config.make
|
||||
echo Generating $config
|
||||
echo "#Configuration file for cntk" > $config
|
||||
|
@ -465,6 +509,7 @@ if test $enable_cuda = yes ; then
|
|||
echo CUDA_PATH=$cuda_path >> $config
|
||||
echo GDK_PATH=$gdk_path >> $config
|
||||
echo CUB_PATH=$cub_path >> $config
|
||||
echo CUDNN_PATH=$cudnn_path >> $config
|
||||
fi
|
||||
if test x$kaldi_path != x ; then
|
||||
echo KALDI_PATH=$kaldi_path >> $config
|
||||
|
|
Загрузка…
Ссылка в новой задаче