Support CUDA 10
* Move to support CUDA 10, cudnn 7.3, cub 1.8. * Fixed a bug related to "pointer to pin pointer is disallowed" #3063, which is exposed in newer version vctools. * Added workaround for a potential vs2017 15.9 bug with cntk Debug version.
This commit is contained in:
Родитель
93e10096cb
Коммит
f1781446d1
|
@ -3,7 +3,7 @@
|
|||
<Import Project="$(SolutionDir)\CNTK.Common.props" />
|
||||
<PropertyGroup>
|
||||
<CudaVersion />
|
||||
<CudaVersion Condition="Exists('$(CUDA_PATH_V9_0)') And '$(CudaVersion)' == ''">9.0</CudaVersion>
|
||||
<CudaVersion Condition="Exists('$(CUDA_PATH_V10_0)') And '$(CudaVersion)' == ''">10.0</CudaVersion>
|
||||
|
||||
<NvmlDll>%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml.dll</NvmlDll>
|
||||
<NvmlDll Condition="Exists('c:\local\nvsmi9\NVSMI\nvml.dll')">c:\local\nvsmi9\NVSMI\nvml.dll</NvmlDll>
|
||||
|
@ -110,10 +110,10 @@
|
|||
<ProtobufLib Condition="$(DebugBuild)">libprotobufd.lib</ProtobufLib>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(CudaVersion)' == '9.0'">
|
||||
<CudaPath>$(CUDA_PATH_V9_0)</CudaPath>
|
||||
<CudaRuntimeDll>cudart64_90.dll</CudaRuntimeDll>
|
||||
<CudaDlls>cublas64_90.dll;cusparse64_90.dll;curand64_90.dll;$(CudaRuntimeDll)</CudaDlls>
|
||||
<PropertyGroup Condition="'$(CudaVersion)' == '10.0'">
|
||||
<CudaPath>$(CUDA_PATH_V10_0)</CudaPath>
|
||||
<CudaRuntimeDll>cudart64_100.dll</CudaRuntimeDll>
|
||||
<CudaDlls>cublas64_100.dll;cusparse64_100.dll;curand64_100.dll;$(CudaRuntimeDll)</CudaDlls>
|
||||
|
||||
<!-- Use NvidiaCompute to define nvcc target architectures (will generate code to support them all, i.e. fat-binary, in release mode)
|
||||
In debug mode we only include cubin/PTX for 30 and rely on PTX / JIT to generate the required native cubin format
|
||||
|
@ -122,7 +122,7 @@
|
|||
<NvidiaCompute Condition="$(DebugBuild) And '$(NvidiaCompute)'==''">compute_30,sm_30</NvidiaCompute>
|
||||
|
||||
<NvidiaCompute Condition="$(ReleaseBuild)">$(CNTK_CUDA_CODEGEN_RELEASE)</NvidiaCompute>
|
||||
<NvidiaCompute Condition="$(ReleaseBuild) And '$(NvidiaCompute)'==''">compute_30,sm_30;compute_35,sm_35;compute_50,sm_50;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70</NvidiaCompute>
|
||||
<NvidiaCompute Condition="$(ReleaseBuild) And '$(NvidiaCompute)'==''">compute_30,sm_30;compute_35,sm_35;compute_50,sm_50;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75</NvidiaCompute>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup>
|
||||
|
@ -158,7 +158,7 @@
|
|||
<PreprocessorDefinitions>CNTK_VERSION="$(CntkVersion)";CNTK_VERSION_BANNER="$(CntkVersionBanner)";CNTK_COMPONENT_VERSION="$(CntkComponentVersion)"</PreprocessorDefinitions>
|
||||
<!-- UWP does not use MPI -->
|
||||
<PreprocessorDefinitions Condition="!$(IsUWP)">%(PreprocessorDefinitions);HAS_MPI=1</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions Condition="'$(CudaVersion)' == '9.0'">%(PreprocessorDefinitions);CUDA_NO_HALF;__CUDA_NO_HALF_OPERATORS__</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions Condition="'$(CudaVersion)' == '10.0'">%(PreprocessorDefinitions);CUDA_NO_HALF;__CUDA_NO_HALF_OPERATORS__</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
|
||||
|
|
|
@ -3,3 +3,16 @@
|
|||
## Highlights of this release
|
||||
* Moved to CUDA 10 for both Windows and Linux.
|
||||
* Support advance RNN loop in ONNX export.
|
||||
|
||||
## CUDA support for CUDA 10
|
||||
|
||||
CNTK now supports CUDA 10. This requires an update to build environment to Visual Studio 2017 v15.9 for Windows.
|
||||
|
||||
To setup build and runtime environment on Windows:
|
||||
* Install [Visual Studio 2017](https://www.visualstudio.com/downloads/). Note: going forward for CUDA 10 and beyond, it is no longer required to install and run with the specific VC Tools version 14.11.
|
||||
* Install [Nvidia CUDA 10](https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64)
|
||||
* From PowerShell, run:
|
||||
[DevInstall.ps1](./Tools/devInstall/Windows/DevInstall.ps1)
|
||||
* Start Visual Studio 2017 and open [CNTK.sln](./CNTK.sln).
|
||||
|
||||
To setup build and runtime environment on Linux using docker, please build Unbuntu 16.04 docker image using Dockerfiles [here](./Tools/docker). For other Linux systems, please refer to the Dockerfiles to setup dependent libraries for CNTK.
|
6
Makefile
6
Makefile
|
@ -22,7 +22,7 @@
|
|||
# CUDA_PATH= Path to CUDA
|
||||
# If not specified, GPU will not be enabled
|
||||
# CUB_PATH= path to NVIDIA CUB installation, so $(CUB_PATH)/cub/cub.cuh exists
|
||||
# defaults to /usr/local/cub-1.4.1
|
||||
# defaults to /usr/local/cub-1.8.0
|
||||
# CUDNN_PATH= path to NVIDIA cuDNN installation so $(CUDNN_PATH)/cuda/include/cudnn.h exists
|
||||
# CuDNN version needs to be 5.0 or higher.
|
||||
# KALDI_PATH= Path to Kaldi
|
||||
|
@ -144,8 +144,8 @@ ifdef CUDA_PATH
|
|||
endif
|
||||
|
||||
ifndef CUB_PATH
|
||||
$(info defaulting CUB_PATH to /usr/local/cub-1.4.1)
|
||||
CUB_PATH=/usr/local/cub-1.4.1
|
||||
$(info defaulting CUB_PATH to /usr/local/cub-1.8.0)
|
||||
CUB_PATH=/usr/local/cub-1.8.0
|
||||
endif
|
||||
|
||||
DEVICE = gpu
|
||||
|
|
|
@ -500,7 +500,9 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
Init(executionEngine, networkConfig, newConfig, dumpFileName, deviceId);
|
||||
// workaround for VS2017 15.9.2 Debug Win32 Access Violation error.
|
||||
wstring networkConfigWstring = networkConfig;
|
||||
Init(executionEngine, networkConfigWstring, newConfig, dumpFileName, deviceId);
|
||||
}
|
||||
|
||||
virtual ~NDLBuilder()
|
||||
|
|
|
@ -231,7 +231,7 @@ public:
|
|||
pin_ptr<const WCHAR> key = PtrToStringChars(item.Key);
|
||||
shared_ptr<std::vector<ElemType>> ptr = CopyList(item.Value);
|
||||
sharedInputVectors.push_back(ptr);
|
||||
stdInputs.insert(MapEntry(key, ptr.get()));
|
||||
stdInputs.insert(MapEntry(static_cast<std::wstring>(key), ptr.get()));
|
||||
}
|
||||
|
||||
for each (auto item in outputs)
|
||||
|
@ -239,7 +239,7 @@ public:
|
|||
pin_ptr<const WCHAR> key = PtrToStringChars(item.Key);
|
||||
shared_ptr<std::vector<ElemType>> ptr = CopyList(item.Value);
|
||||
sharedOutputVectors.push_back(ptr);
|
||||
stdOutputs.insert(MapEntry(key, ptr.get()));
|
||||
stdOutputs.insert(MapEntry(static_cast<std::wstring>(key), ptr.get()));
|
||||
}
|
||||
|
||||
try
|
||||
|
@ -382,13 +382,13 @@ public:
|
|||
std::vector<shared_ptr<std::vector<ElemType>>> sharedOutputVectors;
|
||||
pin_ptr<const WCHAR> inputKey = PtrToStringChars(inputNodeName);
|
||||
shared_ptr<std::vector<ElemType>> f2(featureVector);
|
||||
stdInputs.insert(MapEntry(inputKey, f2.get()));
|
||||
stdInputs.insert(MapEntry(static_cast<std::wstring>(inputKey), f2.get()));
|
||||
|
||||
pin_ptr<const WCHAR> key = PtrToStringChars(outputKey);
|
||||
// Do we have to initialize the output nodes?
|
||||
shared_ptr<std::vector<ElemType>> ptr(new std::vector<ElemType>(outputSize));
|
||||
sharedOutputVectors.push_back(ptr);
|
||||
stdOutputs.insert(MapEntry(key, ptr.get()));
|
||||
stdOutputs.insert(MapEntry(static_cast<std::wstring>(key), ptr.get()));
|
||||
try
|
||||
{
|
||||
m_eval->Evaluate(stdInputs, stdOutputs);
|
||||
|
@ -517,7 +517,7 @@ private:
|
|||
pin_ptr<const WCHAR> key = PtrToStringChars(item.Key);
|
||||
shared_ptr<std::vector<ElemType>> ptr = CopyList(item.Value);
|
||||
sharedOutputVectors.push_back(ptr);
|
||||
stdOutputs.insert(MapEntry(key, ptr.get()));
|
||||
stdOutputs.insert(MapEntry(static_cast<std::wstring>(key), ptr.get()));
|
||||
}
|
||||
|
||||
try
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
#include "CommonMatrix.h"
|
||||
#include "GPUMatrix.h"
|
||||
#include "TensorOps.h" // for exp_() etc.
|
||||
#include "device_functions.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
|
|
|
@ -22,6 +22,8 @@
|
|||
#pragma comment(lib, "cudart.lib") // link CUDA runtime
|
||||
#pragma comment(lib, "cublas.lib")
|
||||
|
||||
#pragma warning(disable : 4505) // 'function' : unreferenced local function has been removed
|
||||
|
||||
namespace msra { namespace cuda {
|
||||
|
||||
static int devicesallocated = -1; // -1 means not initialized
|
||||
|
|
|
@ -11,7 +11,10 @@
|
|||
#include "../CNTKv2LibraryDll/API/HalfConverter.hpp"
|
||||
|
||||
#if !defined(CPUONLY) && __has_include("cuda_fp16.h")
|
||||
#include <cuda_fp16.h> // ASSUME CUDA9
|
||||
|
||||
#pragma warning(disable : 4505) // 'function' : unreferenced local function has been removed
|
||||
|
||||
#include <cuda_fp16.h> // ASSUME CUDA10
|
||||
#else
|
||||
class alignas(2) __half
|
||||
{
|
||||
|
|
|
@ -136,9 +136,9 @@ Function main
|
|||
$operation += OpScanProgram
|
||||
$operation += OpCheckVS2017
|
||||
|
||||
$operation += OpCheckCuda9
|
||||
$operation += OpNVidiaCudnn7090 -cache $localCache -targetFolder $localDir
|
||||
$operation += OpNvidiaCub174 -cache $localCache -targetFolder $localDir
|
||||
$operation += OpCheckCuda10
|
||||
$operation += OpNVidiaCudnn73100 -cache $localCache -targetFolder $localDir
|
||||
$operation += OpNvidiaCub180 -cache $localCache -targetFolder $localDir
|
||||
|
||||
$operation += OpCMake362 -cache $localCache
|
||||
$operation += OpMSMPI70 -cache $localCache
|
||||
|
|
|
@ -149,19 +149,19 @@ function OpMSMPI70SDK(
|
|||
} )
|
||||
}
|
||||
|
||||
function OpNvidiaCub174(
|
||||
function OpNvidiaCub180(
|
||||
[parameter(Mandatory=$true)][string] $cache,
|
||||
[parameter(Mandatory=$true)][string] $targetFolder)
|
||||
{
|
||||
$prodName = "NVidia CUB 1.7.4"
|
||||
$prodFile = "cub-1.7.4.zip"
|
||||
$prodSubDir = "cub-1.7.4"
|
||||
$prodName = "NVidia CUB 1.8.0"
|
||||
$prodFile = "cub-1.8.0.zip"
|
||||
$prodSubDir = "cub-1.8.0"
|
||||
$targetPath = join-path $targetFolder $prodSubDir
|
||||
$envVar = "CUB_PATH";
|
||||
$envValue = $targetPath
|
||||
$downloadSource = "https://github.com/NVlabs/cub/archive/1.7.4.zip"
|
||||
$downloadSource = "https://github.com/NVlabs/cub/archive/1.8.0.zip"
|
||||
|
||||
@( @{ShortName = "CUB174"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName";
|
||||
@( @{ShortName = "CUB180"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName";
|
||||
Verification = @( @{Function = "VerifyDirectory"; Path = "$targetPath" },
|
||||
@{Function = "VerifyEnvironmentAndData"; EnvVar = $envVar; Content = $envValue } );
|
||||
Download = @( @{Function = "Download"; Method = "WebRequest"; Source = $downloadSource; Destination = "$cache\$prodFile" } );
|
||||
|
@ -170,20 +170,20 @@ function OpNvidiaCub174(
|
|||
} )
|
||||
}
|
||||
|
||||
function OpNVidiaCudnn7090(
|
||||
function OpNVidiaCudnn73100(
|
||||
[parameter(Mandatory=$true)][string] $cache,
|
||||
[parameter(Mandatory=$true)][string] $targetFolder)
|
||||
{
|
||||
$prodName = "NVidia CUDNN 7.0.5 for CUDA 9.0"
|
||||
$cudnnWin = "cudnn-9.0-windows10-x64-v7.zip"
|
||||
$prodName = "NVidia CUDNN 7.3.1 for CUDA 10.0"
|
||||
$cudnnWin = "cudnn-10.0-windows10-x64-v7.3.1.20.zip"
|
||||
|
||||
$prodSubDir = "cudnn-9.0-v7.0.5"
|
||||
$prodSubDir = "cudnn-10.0-v7.3.1"
|
||||
$targetPath = join-path $targetFolder $prodSubDir
|
||||
$envVar = "CUDNN_PATH"
|
||||
$envValue = join-path $targetPath "cuda"
|
||||
$downloadSource = "http://developer.download.nvidia.com/compute/redist/cudnn/v7.0.5"
|
||||
$downloadSource = "http://developer.download.nvidia.com/compute/redist/cudnn/v7.3.1"
|
||||
|
||||
@( @{ShortName = "CUDNN7090"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName";
|
||||
@( @{ShortName = "CUDNN73100"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName";
|
||||
Verification = @( @{Function = "VerifyDirectory"; Path = $targetPath },
|
||||
@{Function = "VerifyDirectory"; Path = $envValue },
|
||||
@{Function = "VerifyEnvironmentAndData"; EnvVar = $envVar; Content = $envValue } );
|
||||
|
@ -308,13 +308,13 @@ function OpCheckVS2017
|
|||
} )
|
||||
}
|
||||
|
||||
function OpCheckCuda9
|
||||
function OpCheckCuda10
|
||||
{
|
||||
$programPath = join-path $env:ProgramFiles "NVIDIA GPU Computing Toolkit\CUDA\v9.0"
|
||||
@( @{Name = "Verify Installation of NVidia Cuda 9.0"; ShortName = "PRECUDA90"; VerifyInfo = "Checking for NVidia Cuda 9.0";
|
||||
$programPath = join-path $env:ProgramFiles "NVIDIA GPU Computing Toolkit\CUDA\v10.0"
|
||||
@( @{Name = "Verify Installation of NVidia Cuda 10.0"; ShortName = "PRECUDA100"; VerifyInfo = "Checking for NVidia Cuda 10.0";
|
||||
Verification = @( @{Function = "VerifyDirectory"; Path = $programPath },
|
||||
@{Function = "VerifyEnvironmentAndData"; EnvVar = "CUDA_PATH_V9_0"; Content = $programPath } );
|
||||
PreReq = @( @{Function = "PrereqInfoCuda9" } );
|
||||
@{Function = "VerifyEnvironmentAndData"; EnvVar = "CUDA_PATH_V10_0"; Content = $programPath } );
|
||||
PreReq = @( @{Function = "PrereqInfoCuda10" } );
|
||||
Action = @( @{Function = "StopInstallation" } )
|
||||
} )
|
||||
}
|
||||
|
|
|
@ -44,14 +44,14 @@ for more details.
|
|||
"
|
||||
}
|
||||
|
||||
function PrereqInfoCuda9(
|
||||
function PrereqInfoCuda10(
|
||||
[Parameter(Mandatory = $true)][hashtable] $table
|
||||
)
|
||||
{
|
||||
FunctionIntro $table
|
||||
Write-Warning "
|
||||
|
||||
Installation of NVidia CUDA 9.0 is a pre-requisite before installation can continue.
|
||||
Installation of NVidia CUDA 10.0 is a pre-requisite before installation can continue.
|
||||
Please check
|
||||
https://docs.microsoft.com/en-us/cognitive-toolkit/Setup-CNTK-on-Windows
|
||||
for more details.
|
||||
|
|
|
@ -1,16 +1,14 @@
|
|||
# Tag: nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
|
||||
# Created: 2017-11-21T06:34:14.675603521Z
|
||||
# Label: com.nvidia.build.id: 41212533
|
||||
# Label: com.nvidia.build.ref: e0edb5359ecb7bd3d86f0c9bfa18c2260b741ebb
|
||||
# Label: com.nvidia.cuda.version: 9.0.176
|
||||
# Label: com.nvidia.cudnn.version: 7.0.4.31
|
||||
# Label: com.nvidia.nccl.version: 2.1.2
|
||||
# Tag: nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04
|
||||
# Created: 2018-10-22T21:14:30.605789926Z
|
||||
# Label: com.nvidia.cuda.version: 10.0.
|
||||
# Label: com.nvidia.cudnn.version: 7.3.1.20
|
||||
# Label: com.nvidia.nccl.version: 2.3.5
|
||||
#
|
||||
# To build, run from the parent with the command line:
|
||||
# docker build -t <image name> -f CNTK-GPU-Image/Dockerfile .
|
||||
|
||||
# Ubuntu 16.04.5
|
||||
FROM nvidia/cuda@sha256:33add9c50ab76b8f3a92187c0418ed600d5bea27690fda40711122fdc28ce2f4
|
||||
FROM nvidia/cuda@sha256:362e4e25aa46a18dfa834360140e91b61cdb0a3a2796c8e09dadb268b9de3f6b
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
autotools-dev \
|
||||
|
@ -85,7 +83,7 @@ RUN LIBZIP_VERSION=1.1.2 && \
|
|||
|
||||
ENV LD_LIBRARY_PATH /usr/local/lib:$LD_LIBRARY_PATH
|
||||
|
||||
RUN wget -q -O - https://github.com/NVlabs/cub/archive/1.7.4.tar.gz | tar -C /usr/local -xzf -
|
||||
RUN wget -q -O - https://github.com/NVlabs/cub/archive/1.8.0.tar.gz | tar -C /usr/local -xzf -
|
||||
|
||||
RUN OPENCV_VERSION=3.1.0 && \
|
||||
wget -q -O - https://github.com/Itseez/opencv/archive/${OPENCV_VERSION}.tar.gz | tar -xzf - && \
|
||||
|
|
|
@ -25,7 +25,7 @@ echo Building java.
|
|||
if not exist "%project_dir%com\microsoft\CNTK\lib\windows" mkdir "%project_dir%com\microsoft\CNTK\lib\windows"
|
||||
|
||||
if "%is_gpu%" == "true" (
|
||||
for %%x in (cublas64_90.dll cudart64_90.dll cudnn64_7.dll curand64_90.dll cusparse64_90.dll nvml.dll) do (
|
||||
for %%x in (cublas64_100.dll cudart64_100.dll cudnn64_7.dll curand64_100.dll cusparse64_100.dll nvml.dll) do (
|
||||
copy "%output_dir%/%%x" ".\com\microsoft\CNTK\lib\windows\%%x"
|
||||
echo %%x>> .\com\microsoft\CNTK\lib\windows\NATIVE_MANIFEST
|
||||
)
|
||||
|
|
|
@ -127,13 +127,13 @@ default_openblas=""
|
|||
|
||||
default_boost="boost-1.60.0"
|
||||
|
||||
default_cudas="cuda-9.0"
|
||||
default_cudas="cuda-10.0"
|
||||
default_nccls="nccl"
|
||||
default_kaldis="kaldi-trunk kaldi-c024e8aa"
|
||||
default_gdk_includes="include/nvidia/gdk cuda/include"
|
||||
default_gdk_nvml_libs="src/gdk/nvml/lib cuda/lib64/stubs"
|
||||
default_cubs="cub-1.7.4"
|
||||
default_cudnns="cudnn-7.0"
|
||||
default_cubs="cub-1.8.0"
|
||||
default_cudnns="cudnn-7.3"
|
||||
default_opencvs="opencv-3.1.0 opencv-3.0.0"
|
||||
default_protobuf="protobuf-3.1.0"
|
||||
default_libzips="libzip-1.1.2"
|
||||
|
@ -688,7 +688,7 @@ do
|
|||
then
|
||||
echo "Cannot find NVIDIA CUB directory."
|
||||
echo "Please specify a value for --with-cub"
|
||||
echo "NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.4.1.zip, extract the archive to /usr/local"
|
||||
echo "NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.8.0.zip, extract the archive to /usr/local"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
|
@ -1082,7 +1082,7 @@ then
|
|||
if test x$cub_path = x ; then
|
||||
echo Cannot locate NVIDIA CUB directory
|
||||
echo GPU will be disabled
|
||||
echo NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.4.1.zip, extract the archive to /usr/local
|
||||
echo NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.8.0.zip, extract the archive to /usr/local
|
||||
enable_cuda=no
|
||||
else
|
||||
echo Found CUB at $cub_path
|
||||
|
|
Загрузка…
Ссылка в новой задаче