cudnn: enabled build on Linux with cuDNN.
This commit is contained in:
Родитель
895c10a197
Коммит
3023f11e6e
11
Makefile
11
Makefile
|
@ -23,6 +23,8 @@
|
||||||
# If not specified, GPU will not be enabled
|
# If not specified, GPU will not be enabled
|
||||||
# CUB_PATH= path to NVIDIA CUB installation, so $(CUB_PATH)/cub/cub.cuh exists
|
# CUB_PATH= path to NVIDIA CUB installation, so $(CUB_PATH)/cub/cub.cuh exists
|
||||||
# defaults to /usr/local/cub-1.4.1
|
# defaults to /usr/local/cub-1.4.1
|
||||||
|
# CUDNN_PATH= path to NVIDIA cuDNN installation so $(CUDNN_PATH)/cuda/include/cudnn.h exists
|
||||||
|
# If not specified, CNTK will be be built without cuDNN.
|
||||||
# KALDI_PATH= Path to Kaldi
|
# KALDI_PATH= Path to Kaldi
|
||||||
# If not specified, Kaldi plugins will not be built
|
# If not specified, Kaldi plugins will not be built
|
||||||
# OPENCV_PATH= path to OpenCV 3.0.0 installation, so $(OPENCV_PATH) exists
|
# OPENCV_PATH= path to OpenCV 3.0.0 installation, so $(OPENCV_PATH) exists
|
||||||
|
@ -102,6 +104,13 @@ ifdef CUDA_PATH
|
||||||
LIBPATH += $(CUDA_PATH)/lib64
|
LIBPATH += $(CUDA_PATH)/lib64
|
||||||
LIBS += -lcublas -lcudart -lcuda -lcurand -lcusparse -lnvidia-ml
|
LIBS += -lcublas -lcudart -lcuda -lcurand -lcusparse -lnvidia-ml
|
||||||
|
|
||||||
|
# Set up cuDNN if needed
|
||||||
|
ifdef CUDNN_PATH
|
||||||
|
INCLUDEPATH += $(CUDNN_PATH)/cuda/include
|
||||||
|
LIBPATH += $(CUDNN_PATH)/cuda/lib64
|
||||||
|
LIBS += -lcudnn
|
||||||
|
CPPFLAGS +=-DUSE_CUDNN
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
DEVICE = cpu
|
DEVICE = cpu
|
||||||
|
|
||||||
|
@ -218,6 +227,7 @@ MATH_SRC =\
|
||||||
Math/Math/QuantizedMatrix.cpp \
|
Math/Math/QuantizedMatrix.cpp \
|
||||||
Math/Math/Matrix.cpp \
|
Math/Math/Matrix.cpp \
|
||||||
Math/Math/CUDAPageLockedMemAllocator.cpp \
|
Math/Math/CUDAPageLockedMemAllocator.cpp \
|
||||||
|
Math/Math/ConvolutionEngine.cpp \
|
||||||
|
|
||||||
ifdef CUDA_PATH
|
ifdef CUDA_PATH
|
||||||
MATH_SRC +=\
|
MATH_SRC +=\
|
||||||
|
@ -225,6 +235,7 @@ MATH_SRC +=\
|
||||||
Math/Math/GPUSparseMatrix.cu \
|
Math/Math/GPUSparseMatrix.cu \
|
||||||
Math/Math/GPUWatcher.cu \
|
Math/Math/GPUWatcher.cu \
|
||||||
Math/Math/MatrixQuantizerGPU.cu \
|
Math/Math/MatrixQuantizerGPU.cu \
|
||||||
|
Math/Math/CuDnnConvolutionEngine.cpp \
|
||||||
|
|
||||||
else
|
else
|
||||||
MATH_SRC +=\
|
MATH_SRC +=\
|
||||||
|
|
|
@ -10,63 +10,16 @@
|
||||||
|
|
||||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
|
|
||||||
template<class ElemType>
|
|
||||||
class DefaultConvolutionEngineFactory : public ConvolutionEngineFactory<ElemType>
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
DefaultConvolutionEngineFactory(DEVICEID_TYPE deviceId)
|
|
||||||
: ConvolutionEngineFactory<ElemType>(deviceId)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
Tensor4DPtr CreateTensor(size_t w, size_t h, size_t c, size_t n) override
|
|
||||||
{
|
|
||||||
return std::make_unique<ConvolutionTensor4D>(w, h, c, n);
|
|
||||||
}
|
|
||||||
|
|
||||||
FilterPtr CreateFilter(size_t w, size_t h, size_t c, size_t k) override
|
|
||||||
{
|
|
||||||
return std::make_unique<Filter>(w, h, c, k);
|
|
||||||
}
|
|
||||||
|
|
||||||
ConvDescPtr CreateConvDescriptor(const Tensor4D& /*inT*/, const Filter& /*filterT*/,
|
|
||||||
size_t wStride, size_t hStride, bool padding) override
|
|
||||||
{
|
|
||||||
return std::make_unique<ConvDesc>(wStride, hStride, padding);
|
|
||||||
}
|
|
||||||
|
|
||||||
PoolDescPtr CreatePoolDescriptor(PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad) override
|
|
||||||
{
|
|
||||||
return std::make_unique<PoolDesc>(kind, w, h, wStride, hStride, wPad, hPad);
|
|
||||||
}
|
|
||||||
|
|
||||||
ConvEnginePtr CreateConvEngine(size_t maxTempMemSizeInSamples) override
|
|
||||||
{
|
|
||||||
return std::make_unique<DefaultConvolutionEngine<ElemType>>(m_deviceId, maxTempMemSizeInSamples);
|
|
||||||
}
|
|
||||||
|
|
||||||
PoolEnginePtr CreatePoolEngine() override
|
|
||||||
{
|
|
||||||
return std::make_unique<DefaultPoolingEngine<ElemType>>();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<class ElemType>
|
|
||||||
std::unique_ptr<ConvolutionEngineFactory<ElemType>> ConvolutionEngineFactory<ElemType>::Create(DEVICEID_TYPE deviceId)
|
|
||||||
{
|
|
||||||
// REVIEW alexeyk: make cuDNN default when running on GPU and compiled with cuDNN, add config parameter to enable runtime switch between implementations.
|
|
||||||
if (deviceId >= 0 && CuDnnConvolutionEngineFactory<ElemType>::IsSupported())
|
|
||||||
return std::make_unique<CuDnnConvolutionEngineFactory<ElemType>>(deviceId);
|
|
||||||
return std::make_unique<DefaultConvolutionEngineFactory<ElemType>>(deviceId);
|
|
||||||
}
|
|
||||||
|
|
||||||
template class ConvolutionEngineFactory<float>;
|
|
||||||
template class ConvolutionEngineFactory<double>;
|
|
||||||
|
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
class DefaultConvolutionEngine : public ConvolutionEngine<ElemType>
|
class DefaultConvolutionEngine : public ConvolutionEngine<ElemType>
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
using Base = ConvolutionEngine<ElemType>;
|
||||||
|
using typename Base::Mat;
|
||||||
|
using typename Base::Tensor4D;
|
||||||
|
using typename Base::Filter;
|
||||||
|
using typename Base::ConvDesc;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
DefaultConvolutionEngine(DEVICEID_TYPE deviceId, size_t maxTempMemSizeInSamples)
|
DefaultConvolutionEngine(DEVICEID_TYPE deviceId, size_t maxTempMemSizeInSamples)
|
||||||
: m_tempMatrix(deviceId), m_maxTempMemSizeInSamples(maxTempMemSizeInSamples)
|
: m_tempMatrix(deviceId), m_maxTempMemSizeInSamples(maxTempMemSizeInSamples)
|
||||||
|
@ -294,6 +247,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
class DefaultPoolingEngine : public PoolingEngine<ElemType>
|
class DefaultPoolingEngine : public PoolingEngine<ElemType>
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
using Base = PoolingEngine<ElemType>;
|
||||||
|
using typename Base::Tensor4D;
|
||||||
|
using typename Base::PoolDesc;
|
||||||
|
using typename Base::Mat;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void Forward(const Tensor4D& inT, const Mat& in, const PoolDesc& poolDesc, const Tensor4D& outT, Mat& out) override
|
void Forward(const Tensor4D& inT, const Mat& in, const PoolDesc& poolDesc, const Tensor4D& outT, Mat& out) override
|
||||||
{
|
{
|
||||||
|
@ -349,4 +308,75 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
|
|
||||||
template class PoolingEngine<float>;
|
template class PoolingEngine<float>;
|
||||||
template class PoolingEngine<double>;
|
template class PoolingEngine<double>;
|
||||||
|
|
||||||
|
template<class ElemType>
|
||||||
|
class DefaultConvolutionEngineFactory : public ConvolutionEngineFactory<ElemType>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using Base = ConvolutionEngineFactory<ElemType>;
|
||||||
|
using typename Base::Tensor4D;
|
||||||
|
using typename Base::Tensor4DPtr;
|
||||||
|
using typename Base::Filter;
|
||||||
|
using typename Base::FilterPtr;
|
||||||
|
using typename Base::ConvDesc;
|
||||||
|
using typename Base::ConvDescPtr;
|
||||||
|
using typename Base::PoolDesc;
|
||||||
|
using typename Base::PoolDescPtr;
|
||||||
|
|
||||||
|
using typename Base::ConvEnginePtr;
|
||||||
|
using typename Base::PoolEnginePtr;
|
||||||
|
|
||||||
|
using Base::m_deviceId;
|
||||||
|
|
||||||
|
public:
|
||||||
|
DefaultConvolutionEngineFactory(DEVICEID_TYPE deviceId)
|
||||||
|
: ConvolutionEngineFactory<ElemType>(deviceId)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
Tensor4DPtr CreateTensor(size_t w, size_t h, size_t c, size_t n) override
|
||||||
|
{
|
||||||
|
return std::make_unique<ConvolutionTensor4D>(w, h, c, n);
|
||||||
|
}
|
||||||
|
|
||||||
|
FilterPtr CreateFilter(size_t w, size_t h, size_t c, size_t k) override
|
||||||
|
{
|
||||||
|
return std::make_unique<Filter>(w, h, c, k);
|
||||||
|
}
|
||||||
|
|
||||||
|
ConvDescPtr CreateConvDescriptor(const Tensor4D& /*inT*/, const Filter& /*filterT*/,
|
||||||
|
size_t wStride, size_t hStride, bool padding) override
|
||||||
|
{
|
||||||
|
return std::make_unique<ConvDesc>(wStride, hStride, padding);
|
||||||
|
}
|
||||||
|
|
||||||
|
PoolDescPtr CreatePoolDescriptor(typename PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad) override
|
||||||
|
{
|
||||||
|
return std::make_unique<PoolDesc>(kind, w, h, wStride, hStride, wPad, hPad);
|
||||||
|
}
|
||||||
|
|
||||||
|
ConvEnginePtr CreateConvEngine(size_t maxTempMemSizeInSamples) override
|
||||||
|
{
|
||||||
|
return std::make_unique<DefaultConvolutionEngine<ElemType>>(m_deviceId, maxTempMemSizeInSamples);
|
||||||
|
}
|
||||||
|
|
||||||
|
PoolEnginePtr CreatePoolEngine() override
|
||||||
|
{
|
||||||
|
return std::make_unique<DefaultPoolingEngine<ElemType>>();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<class ElemType>
|
||||||
|
std::unique_ptr<ConvolutionEngineFactory<ElemType>> ConvolutionEngineFactory<ElemType>::Create(DEVICEID_TYPE deviceId)
|
||||||
|
{
|
||||||
|
// REVIEW alexeyk: make cuDNN default when running on GPU and compiled with cuDNN, add config parameter to enable runtime switch between implementations.
|
||||||
|
//if (deviceId >= 0 && CuDnnConvolutionEngineFactory<ElemType>::IsSupported())
|
||||||
|
return std::make_unique<CuDnnConvolutionEngineFactory<ElemType>>(deviceId);
|
||||||
|
//return std::make_unique<DefaultConvolutionEngineFactory<ElemType>>(deviceId);
|
||||||
|
}
|
||||||
|
|
||||||
|
template class ConvolutionEngineFactory<float>;
|
||||||
|
template class ConvolutionEngineFactory<double>;
|
||||||
|
|
||||||
}}}
|
}}}
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
#ifdef USE_CUDNN
|
#ifdef USE_CUDNN
|
||||||
#include <cudnn.h>
|
#include <cudnn.h>
|
||||||
|
|
||||||
template<> static const char* CudaErrString(cudnnStatus_t x)
|
template<> const char* CudaErrString(cudnnStatus_t x)
|
||||||
{
|
{
|
||||||
return cudnnGetErrorString(x);
|
return cudnnGetErrorString(x);
|
||||||
}
|
}
|
||||||
|
@ -202,12 +202,11 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
class CuDnnConvolutionEngine : public ConvolutionEngine<ElemType>
|
class CuDnnConvolutionEngine : public ConvolutionEngine<ElemType>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
using Tensor4D = ConvolutionTensor4D;
|
using Base = ConvolutionEngine<ElemType>;
|
||||||
using Tensor4DPtr = std::unique_ptr<Tensor4D>;
|
using typename Base::Mat;
|
||||||
using Filter = ConvolutionFilter;
|
using typename Base::Tensor4D;
|
||||||
using FilterPtr = std::unique_ptr<ConvolutionFilter>;
|
using typename Base::Filter;
|
||||||
using ConvDesc = ConvolutionDescriptor;
|
using typename Base::ConvDesc;
|
||||||
using ConvDescPtr = std::unique_ptr<ConvolutionDescriptor>;
|
|
||||||
|
|
||||||
CuDnnConvolutionEngine(DEVICEID_TYPE deviceId, size_t maxTempMemSizeInSamples)
|
CuDnnConvolutionEngine(DEVICEID_TYPE deviceId, size_t maxTempMemSizeInSamples)
|
||||||
: m_maxTempMemSizeInSamples(maxTempMemSizeInSamples), m_cudnn(nullptr), m_curMBSize(0), m_tempC(deviceId)
|
: m_maxTempMemSizeInSamples(maxTempMemSizeInSamples), m_cudnn(nullptr), m_curMBSize(0), m_tempC(deviceId)
|
||||||
|
@ -495,6 +494,12 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
class CuDnnPoolingEngine : public PoolingEngine<ElemType>
|
class CuDnnPoolingEngine : public PoolingEngine<ElemType>
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
using Base = PoolingEngine<ElemType>;
|
||||||
|
using typename Base::Tensor4D;
|
||||||
|
using typename Base::PoolDesc;
|
||||||
|
using typename Base::Mat;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
CuDnnPoolingEngine()
|
CuDnnPoolingEngine()
|
||||||
: m_cudnn(nullptr)
|
: m_cudnn(nullptr)
|
||||||
|
@ -546,7 +551,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
typename CuDnnConvolutionEngineFactory<ElemType>::Tensor4DPtr CuDnnConvolutionEngineFactory<ElemType>::CreateTensor(size_t w, size_t h, size_t c, size_t n)
|
typename CuDnnConvolutionEngineFactory<ElemType>::Tensor4DPtr CuDnnConvolutionEngineFactory<ElemType>::CreateTensor(size_t w, size_t h, size_t c, size_t n)
|
||||||
{
|
{
|
||||||
static_assert(false, "cuDNN engine currently supports only single and double precision tensors.");
|
// REVIEW alexeyk: assert fires in GCC but not in VC++.
|
||||||
|
//static_assert(false, "cuDNN engine currently supports only single and double precision tensors.");
|
||||||
}
|
}
|
||||||
template<>
|
template<>
|
||||||
typename CuDnnConvolutionEngineFactory<float>::Tensor4DPtr CuDnnConvolutionEngineFactory<float>::CreateTensor(size_t w, size_t h, size_t c, size_t n)
|
typename CuDnnConvolutionEngineFactory<float>::Tensor4DPtr CuDnnConvolutionEngineFactory<float>::CreateTensor(size_t w, size_t h, size_t c, size_t n)
|
||||||
|
@ -562,7 +568,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
typename CuDnnConvolutionEngineFactory<ElemType>::FilterPtr CuDnnConvolutionEngineFactory<ElemType>::CreateFilter(size_t w, size_t h, size_t c, size_t k)
|
typename CuDnnConvolutionEngineFactory<ElemType>::FilterPtr CuDnnConvolutionEngineFactory<ElemType>::CreateFilter(size_t w, size_t h, size_t c, size_t k)
|
||||||
{
|
{
|
||||||
static_assert(false, "cuDNN engine currently supports only single and double precision filters.");
|
// REVIEW alexeyk: assert fires in GCC but not in VC++.
|
||||||
|
//static_assert(false, "cuDNN engine currently supports only single and double precision filters.");
|
||||||
}
|
}
|
||||||
template<>
|
template<>
|
||||||
typename CuDnnConvolutionEngineFactory<float>::FilterPtr CuDnnConvolutionEngineFactory<float>::CreateFilter(size_t w, size_t h, size_t c, size_t k)
|
typename CuDnnConvolutionEngineFactory<float>::FilterPtr CuDnnConvolutionEngineFactory<float>::CreateFilter(size_t w, size_t h, size_t c, size_t k)
|
||||||
|
@ -586,7 +593,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
|
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
typename CuDnnConvolutionEngineFactory<ElemType>::PoolDescPtr CuDnnConvolutionEngineFactory<ElemType>::CreatePoolDescriptor(
|
typename CuDnnConvolutionEngineFactory<ElemType>::PoolDescPtr CuDnnConvolutionEngineFactory<ElemType>::CreatePoolDescriptor(
|
||||||
PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad)
|
typename PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad)
|
||||||
{
|
{
|
||||||
return std::make_unique<CuDnnPoolingDescriptor>(kind, w, h, wStride, hStride, wPad, hPad);
|
return std::make_unique<CuDnnPoolingDescriptor>(kind, w, h, wStride, hStride, wPad, hPad);
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,6 +13,22 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
template<class ElemType>
|
template<class ElemType>
|
||||||
class CuDnnConvolutionEngineFactory : public ConvolutionEngineFactory<ElemType>
|
class CuDnnConvolutionEngineFactory : public ConvolutionEngineFactory<ElemType>
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
using Base = ConvolutionEngineFactory<ElemType>;
|
||||||
|
using typename Base::Tensor4D;
|
||||||
|
using typename Base::Tensor4DPtr;
|
||||||
|
using typename Base::Filter;
|
||||||
|
using typename Base::FilterPtr;
|
||||||
|
using typename Base::ConvDesc;
|
||||||
|
using typename Base::ConvDescPtr;
|
||||||
|
using typename Base::PoolDesc;
|
||||||
|
using typename Base::PoolDescPtr;
|
||||||
|
|
||||||
|
using typename Base::ConvEnginePtr;
|
||||||
|
using typename Base::PoolEnginePtr;
|
||||||
|
|
||||||
|
using Base::m_deviceId;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
CuDnnConvolutionEngineFactory(DEVICEID_TYPE deviceId)
|
CuDnnConvolutionEngineFactory(DEVICEID_TYPE deviceId)
|
||||||
: ConvolutionEngineFactory<ElemType>(deviceId)
|
: ConvolutionEngineFactory<ElemType>(deviceId)
|
||||||
|
@ -22,9 +38,9 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
public:
|
public:
|
||||||
Tensor4DPtr CreateTensor(size_t w, size_t h, size_t c, size_t n) override;
|
Tensor4DPtr CreateTensor(size_t w, size_t h, size_t c, size_t n) override;
|
||||||
FilterPtr CreateFilter(size_t w, size_t h, size_t c, size_t k) override;
|
FilterPtr CreateFilter(size_t w, size_t h, size_t c, size_t k) override;
|
||||||
ConvDescPtr CreateConvDescriptor(const Tensor4D& inT, const Filter& filterT,
|
ConvDescPtr CreateConvDescriptor(const Tensor4D& inT, const Filter& filterT,
|
||||||
size_t wStride, size_t hStride, bool padding) override;
|
size_t wStride, size_t hStride, bool padding) override;
|
||||||
PoolDescPtr CreatePoolDescriptor(PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad) override;
|
PoolDescPtr CreatePoolDescriptor(typename PoolDesc::PoolKind kind, size_t w, size_t h, size_t wStride, size_t hStride, size_t wPad, size_t hPad) override;
|
||||||
|
|
||||||
ConvEnginePtr CreateConvEngine(size_t maxTempMemSizeInSamples) override;
|
ConvEnginePtr CreateConvEngine(size_t maxTempMemSizeInSamples) override;
|
||||||
PoolEnginePtr CreatePoolEngine() override;
|
PoolEnginePtr CreatePoolEngine() override;
|
||||||
|
|
|
@ -32,6 +32,10 @@ have_cub=no
|
||||||
cub_path=
|
cub_path=
|
||||||
cub_check=cub/cub.cuh
|
cub_check=cub/cub.cuh
|
||||||
|
|
||||||
|
have_cudnn=no
|
||||||
|
cudnn_path=
|
||||||
|
cudnn_check=cuda/include/cudnn.h
|
||||||
|
|
||||||
have_opencv=no
|
have_opencv=no
|
||||||
opencv_path=
|
opencv_path=
|
||||||
opencv_check=include/opencv2/opencv.hpp
|
opencv_check=include/opencv2/opencv.hpp
|
||||||
|
@ -49,6 +53,7 @@ default_cudas="cuda-7.5 cuda-7.0 cuda-6.5"
|
||||||
default_kaldis="kaldi-trunk"
|
default_kaldis="kaldi-trunk"
|
||||||
default_gdks=". gdk/usr"
|
default_gdks=". gdk/usr"
|
||||||
default_cubs="cub-1.4.1"
|
default_cubs="cub-1.4.1"
|
||||||
|
default_cudnns="cudnn-4.0"
|
||||||
default_opencvs="opencv-3.0.0"
|
default_opencvs="opencv-3.0.0"
|
||||||
|
|
||||||
function default_paths ()
|
function default_paths ()
|
||||||
|
@ -115,6 +120,11 @@ function find_cub ()
|
||||||
find_dir "$default_cubs" "$cub_check"
|
find_dir "$default_cubs" "$cub_check"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function find_cudnn ()
|
||||||
|
{
|
||||||
|
find_dir "$default_cudnns" "$cudnn_check"
|
||||||
|
}
|
||||||
|
|
||||||
function find_opencv ()
|
function find_opencv ()
|
||||||
{
|
{
|
||||||
find_dir "$default_opencvs" "$opencv_check"
|
find_dir "$default_opencvs" "$opencv_check"
|
||||||
|
@ -162,6 +172,7 @@ function show_help ()
|
||||||
echo " --with-cuda[=directory] $(show_default $(find_cuda))"
|
echo " --with-cuda[=directory] $(show_default $(find_cuda))"
|
||||||
echo " --with-cub[=directory] $(show_default $(find_cub))"
|
echo " --with-cub[=directory] $(show_default $(find_cub))"
|
||||||
echo " --with-gdk[=directory] $(show_default $(find_gdk))"
|
echo " --with-gdk[=directory] $(show_default $(find_gdk))"
|
||||||
|
echo " --with-cudnn[=directory] $(show_default $(find_cudnn))"
|
||||||
echo " --with-acml[=directory] $(show_default $(find_acml))"
|
echo " --with-acml[=directory] $(show_default $(find_acml))"
|
||||||
echo " --with-mkl[=directory] $(show_default $(find_mkl))"
|
echo " --with-mkl[=directory] $(show_default $(find_mkl))"
|
||||||
echo " --with-buildtype=(debug|release) $(show_default $default_buildtype)"
|
echo " --with-buildtype=(debug|release) $(show_default $default_buildtype)"
|
||||||
|
@ -278,6 +289,27 @@ do
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
|
--with-cudnn*)
|
||||||
|
have_cudnn=yes
|
||||||
|
if test x$optarg = x
|
||||||
|
then
|
||||||
|
cudnn_path=$(find_cudnn)
|
||||||
|
if test x$cudnn_path = x
|
||||||
|
then
|
||||||
|
echo "Cannot find NVIDIA cuDNN directory."
|
||||||
|
echo "Please specify a value for --with-cudnn"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
if test $(check_dir $optarg $cudnn_check) = yes
|
||||||
|
then
|
||||||
|
cudnn_path=$optarg
|
||||||
|
else
|
||||||
|
echo "Invalid cuDNN directory $optarg"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
;;
|
||||||
--with-acml*)
|
--with-acml*)
|
||||||
have_acml=yes
|
have_acml=yes
|
||||||
mathlib=acml
|
mathlib=acml
|
||||||
|
@ -448,6 +480,18 @@ then
|
||||||
echo Found CUB at $cub_path
|
echo Found CUB at $cub_path
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if test $enable_cuda = yes && test x$cudnn_path = x
|
||||||
|
then
|
||||||
|
cudnn_path=$(find_cudnn)
|
||||||
|
if test x$cudnn_path = x ; then
|
||||||
|
echo Cannot locate NVIDIA cuDNN directory
|
||||||
|
echo CNTK will use default convolution engine.
|
||||||
|
else
|
||||||
|
echo Found cuDNN at $cudnn_path
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
config=$build_top/Config.make
|
config=$build_top/Config.make
|
||||||
echo Generating $config
|
echo Generating $config
|
||||||
echo "#Configuration file for cntk" > $config
|
echo "#Configuration file for cntk" > $config
|
||||||
|
@ -465,6 +509,7 @@ if test $enable_cuda = yes ; then
|
||||||
echo CUDA_PATH=$cuda_path >> $config
|
echo CUDA_PATH=$cuda_path >> $config
|
||||||
echo GDK_PATH=$gdk_path >> $config
|
echo GDK_PATH=$gdk_path >> $config
|
||||||
echo CUB_PATH=$cub_path >> $config
|
echo CUB_PATH=$cub_path >> $config
|
||||||
|
echo CUDNN_PATH=$cudnn_path >> $config
|
||||||
fi
|
fi
|
||||||
if test x$kaldi_path != x ; then
|
if test x$kaldi_path != x ; then
|
||||||
echo KALDI_PATH=$kaldi_path >> $config
|
echo KALDI_PATH=$kaldi_path >> $config
|
||||||
|
|
Загрузка…
Ссылка в новой задаче