Support CUDA 10

* Move to support CUDA 10, cudnn 7.3, cub 1.8. * Fixed a bug related to "pointer to pin pointer is disallowed" #3063, which is exposed in newer version vctools. * Added workaround for a potential vs2017 15.9 bug with cntk Debug version.
2018-10-24 17:34:01 -07:00 · 2018-10-24 17:34:01 -07:00 · f1781446d1
--- a/CNTK.Cpp.props
+++ b/CNTK.Cpp.props
@ -3,7 +3,7 @@
  <Import Project="$(SolutionDir)\CNTK.Common.props" />
  <PropertyGroup>
    <CudaVersion />
-    <CudaVersion Condition="Exists('$(CUDA_PATH_V9_0)') And '$(CudaVersion)' == ''">9.0</CudaVersion>
+    <CudaVersion Condition="Exists('$(CUDA_PATH_V10_0)') And '$(CudaVersion)' == ''">10.0</CudaVersion>

    <NvmlDll>%ProgramW6432%\NVIDIA Corporation\NVSMI\nvml.dll</NvmlDll>
    <NvmlDll Condition="Exists('c:\local\nvsmi9\NVSMI\nvml.dll')">c:\local\nvsmi9\NVSMI\nvml.dll</NvmlDll>
@ -110,10 +110,10 @@
    <ProtobufLib Condition="$(DebugBuild)">libprotobufd.lib</ProtobufLib>
  </PropertyGroup>

-  <PropertyGroup Condition="'$(CudaVersion)' == '9.0'">
-    <CudaPath>$(CUDA_PATH_V9_0)</CudaPath>
-    <CudaRuntimeDll>cudart64_90.dll</CudaRuntimeDll>
-    <CudaDlls>cublas64_90.dll;cusparse64_90.dll;curand64_90.dll;$(CudaRuntimeDll)</CudaDlls>
+  <PropertyGroup Condition="'$(CudaVersion)' == '10.0'">
+    <CudaPath>$(CUDA_PATH_V10_0)</CudaPath>
+    <CudaRuntimeDll>cudart64_100.dll</CudaRuntimeDll>
+    <CudaDlls>cublas64_100.dll;cusparse64_100.dll;curand64_100.dll;$(CudaRuntimeDll)</CudaDlls>

    <!-- Use NvidiaCompute to define nvcc target architectures (will generate code to support them all, i.e. fat-binary, in release mode)
    In debug mode we only include cubin/PTX for 30 and rely on PTX / JIT to generate the required native cubin format
@ -122,7 +122,7 @@
    <NvidiaCompute Condition="$(DebugBuild) And '$(NvidiaCompute)'==''">compute_30,sm_30</NvidiaCompute>

    <NvidiaCompute Condition="$(ReleaseBuild)">$(CNTK_CUDA_CODEGEN_RELEASE)</NvidiaCompute>
-    <NvidiaCompute Condition="$(ReleaseBuild) And '$(NvidiaCompute)'==''">compute_30,sm_30;compute_35,sm_35;compute_50,sm_50;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70</NvidiaCompute>
+    <NvidiaCompute Condition="$(ReleaseBuild) And '$(NvidiaCompute)'==''">compute_30,sm_30;compute_35,sm_35;compute_50,sm_50;compute_60,sm_60;compute_61,sm_61;compute_70,sm_70;compute_75,sm_75</NvidiaCompute>
  </PropertyGroup>

  <PropertyGroup>
@ -158,7 +158,7 @@
      <PreprocessorDefinitions>CNTK_VERSION="$(CntkVersion)";CNTK_VERSION_BANNER="$(CntkVersionBanner)";CNTK_COMPONENT_VERSION="$(CntkComponentVersion)"</PreprocessorDefinitions>
      <!-- UWP does not use MPI -->
      <PreprocessorDefinitions Condition="!$(IsUWP)">%(PreprocessorDefinitions);HAS_MPI=1</PreprocessorDefinitions>
-      <PreprocessorDefinitions Condition="'$(CudaVersion)' == '9.0'">%(PreprocessorDefinitions);CUDA_NO_HALF;__CUDA_NO_HALF_OPERATORS__</PreprocessorDefinitions>
+      <PreprocessorDefinitions Condition="'$(CudaVersion)' == '10.0'">%(PreprocessorDefinitions);CUDA_NO_HALF;__CUDA_NO_HALF_OPERATORS__</PreprocessorDefinitions>
    </ClCompile>
  </ItemDefinitionGroup>

--- a/Documentation/current_iteration.md
+++ b/Documentation/current_iteration.md
@ -3,3 +3,16 @@
 ## Highlights of this release
 * Moved to CUDA 10 for both Windows and Linux.
 * Support advance RNN loop in ONNX export.
+
+## CUDA support for CUDA 10
+
+CNTK now supports CUDA 10. This requires an update to build environment to Visual Studio 2017 v15.9 for Windows.
+
+To setup build and runtime environment on Windows:
+* Install [Visual Studio 2017](https://www.visualstudio.com/downloads/). Note: going forward for CUDA 10 and beyond, it is no longer required to install and run with the specific VC Tools version 14.11.
+* Install [Nvidia CUDA 10](https://developer.nvidia.com/cuda-downloads?target_os=Windows&target_arch=x86_64)
+* From PowerShell, run:
+    [DevInstall.ps1](./Tools/devInstall/Windows/DevInstall.ps1)
+* Start Visual Studio 2017 and open [CNTK.sln](./CNTK.sln).
+
+To setup build and runtime environment on Linux using docker, please build Unbuntu 16.04 docker image using Dockerfiles [here](./Tools/docker). For other Linux systems, please refer to the Dockerfiles to setup dependent libraries for CNTK.
--- a/6
+++ b/6
@ -22,7 +22,7 @@
 #   CUDA_PATH= Path to CUDA
 #     If not specified, GPU will not be enabled
 #   CUB_PATH= path to NVIDIA CUB installation, so $(CUB_PATH)/cub/cub.cuh exists
-#     defaults to /usr/local/cub-1.4.1
+#     defaults to /usr/local/cub-1.8.0
 #   CUDNN_PATH= path to NVIDIA cuDNN installation so $(CUDNN_PATH)/cuda/include/cudnn.h exists
 #     CuDNN version needs to be 5.0 or higher.
 #   KALDI_PATH= Path to Kaldi
@ -144,8 +144,8 @@ ifdef CUDA_PATH
  endif

  ifndef CUB_PATH
-    $(info defaulting CUB_PATH to /usr/local/cub-1.4.1)
-    CUB_PATH=/usr/local/cub-1.4.1
+    $(info defaulting CUB_PATH to /usr/local/cub-1.8.0)
+    CUB_PATH=/usr/local/cub-1.8.0
  endif

  DEVICE = gpu
--- a/Source/ActionsLib/NDLNetworkBuilder.h
+++ b/Source/ActionsLib/NDLNetworkBuilder.h
@ -500,7 +500,9 @@ public:
            }
        }

-        Init(executionEngine, networkConfig, newConfig, dumpFileName, deviceId);
+        // workaround for VS2017 15.9.2 Debug Win32 Access Violation error.
+        wstring networkConfigWstring = networkConfig;
+        Init(executionEngine, networkConfigWstring, newConfig, dumpFileName, deviceId);
    }

    virtual ~NDLBuilder()
--- a/Source/Extensibility/EvalWrapper/EvalWrapper.cpp
+++ b/Source/Extensibility/EvalWrapper/EvalWrapper.cpp
@ -231,7 +231,7 @@ public:
                pin_ptr<const WCHAR> key = PtrToStringChars(item.Key);
                shared_ptr<std::vector<ElemType>> ptr = CopyList(item.Value);
                sharedInputVectors.push_back(ptr);
-                stdInputs.insert(MapEntry(key, ptr.get()));
+                stdInputs.insert(MapEntry(static_cast<std::wstring>(key), ptr.get()));
            }

            for each (auto item in outputs)
@ -239,7 +239,7 @@ public:
                pin_ptr<const WCHAR> key = PtrToStringChars(item.Key);
                shared_ptr<std::vector<ElemType>> ptr = CopyList(item.Value);
                sharedOutputVectors.push_back(ptr);
-                stdOutputs.insert(MapEntry(key, ptr.get()));
+                stdOutputs.insert(MapEntry(static_cast<std::wstring>(key), ptr.get()));
            }

            try
@ -382,13 +382,13 @@ public:
        std::vector<shared_ptr<std::vector<ElemType>>> sharedOutputVectors;
        pin_ptr<const WCHAR> inputKey = PtrToStringChars(inputNodeName);
        shared_ptr<std::vector<ElemType>> f2(featureVector);
-        stdInputs.insert(MapEntry(inputKey, f2.get()));
+        stdInputs.insert(MapEntry(static_cast<std::wstring>(inputKey), f2.get()));

        pin_ptr<const WCHAR> key = PtrToStringChars(outputKey);
        // Do we have to initialize the output nodes?
        shared_ptr<std::vector<ElemType>> ptr(new std::vector<ElemType>(outputSize));
        sharedOutputVectors.push_back(ptr);
-        stdOutputs.insert(MapEntry(key, ptr.get()));
+        stdOutputs.insert(MapEntry(static_cast<std::wstring>(key), ptr.get()));
        try
        {
            m_eval->Evaluate(stdInputs, stdOutputs);
@ -517,7 +517,7 @@ private:
            pin_ptr<const WCHAR> key = PtrToStringChars(item.Key);
            shared_ptr<std::vector<ElemType>> ptr = CopyList(item.Value);
            sharedOutputVectors.push_back(ptr);
-            stdOutputs.insert(MapEntry(key, ptr.get()));
+            stdOutputs.insert(MapEntry(static_cast<std::wstring>(key), ptr.get()));
        }

        try
--- a/Source/Math/GPUMatrixCUDAKernels.cuh
+++ b/Source/Math/GPUMatrixCUDAKernels.cuh
@ -15,7 +15,6 @@
 #include "CommonMatrix.h"
 #include "GPUMatrix.h"
 #include "TensorOps.h" // for exp_() etc.
-#include "device_functions.h"
 #include <cuda_runtime.h>
 #include <assert.h>
 #include <float.h>
--- a/Source/Math/cudalib.cpp
+++ b/Source/Math/cudalib.cpp
@ -22,6 +22,8 @@
 #pragma comment(lib, "cudart.lib") // link CUDA runtime
 #pragma comment(lib, "cublas.lib")

+#pragma warning(disable : 4505) // 'function' : unreferenced local function has been removed
+
 namespace msra { namespace cuda {

 static int devicesallocated = -1; // -1 means not initialized
--- a/Source/Math/half.hpp
+++ b/Source/Math/half.hpp
@ -11,7 +11,10 @@
 #include "../CNTKv2LibraryDll/API/HalfConverter.hpp"

 #if !defined(CPUONLY) && __has_include("cuda_fp16.h")
-#include <cuda_fp16.h> // ASSUME CUDA9
+
+#pragma warning(disable : 4505) // 'function' : unreferenced local function has been removed
+
+#include <cuda_fp16.h> // ASSUME CUDA10
 #else
 class alignas(2) __half
 {
--- a/Tools/devInstall/Windows/DevInstall.ps1
+++ b/Tools/devInstall/Windows/DevInstall.ps1
@ -136,9 +136,9 @@ Function main
        $operation += OpScanProgram
        $operation += OpCheckVS2017

-        $operation += OpCheckCuda9
-        $operation += OpNVidiaCudnn7090 -cache $localCache -targetFolder $localDir
-        $operation += OpNvidiaCub174 -cache $localCache -targetFolder $localDir
+        $operation += OpCheckCuda10
+        $operation += OpNVidiaCudnn73100 -cache $localCache -targetFolder $localDir
+        $operation += OpNvidiaCub180 -cache $localCache -targetFolder $localDir

        $operation += OpCMake362 -cache $localCache
        $operation += OpMSMPI70 -cache $localCache
--- a/Tools/devInstall/Windows/helper/Operations.ps1
+++ b/Tools/devInstall/Windows/helper/Operations.ps1
@ -149,19 +149,19 @@ function OpMSMPI70SDK(
        } )
 }

-function OpNvidiaCub174(
+function OpNvidiaCub180(
    [parameter(Mandatory=$true)][string] $cache,
    [parameter(Mandatory=$true)][string] $targetFolder)
 {
-    $prodName = "NVidia CUB 1.7.4"
-    $prodFile = "cub-1.7.4.zip"
-    $prodSubDir = "cub-1.7.4"
+    $prodName = "NVidia CUB 1.8.0"
+    $prodFile = "cub-1.8.0.zip"
+    $prodSubDir = "cub-1.8.0"
    $targetPath = join-path $targetFolder $prodSubDir
    $envVar = "CUB_PATH";
    $envValue = $targetPath
-    $downloadSource = "https://github.com/NVlabs/cub/archive/1.7.4.zip"
+    $downloadSource = "https://github.com/NVlabs/cub/archive/1.8.0.zip"

-    @( @{ShortName = "CUB174"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName";
+    @( @{ShortName = "CUB180"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName";
         Verification = @( @{Function = "VerifyDirectory"; Path = "$targetPath" },
                           @{Function = "VerifyEnvironmentAndData"; EnvVar = $envVar; Content = $envValue } );
         Download = @( @{Function = "Download"; Method = "WebRequest"; Source = $downloadSource; Destination = "$cache\$prodFile" } );
@ -170,20 +170,20 @@ function OpNvidiaCub174(
         } )
 }

-function OpNVidiaCudnn7090(
+function OpNVidiaCudnn73100(
    [parameter(Mandatory=$true)][string] $cache,
    [parameter(Mandatory=$true)][string] $targetFolder)
 {
-    $prodName = "NVidia CUDNN 7.0.5 for CUDA 9.0"
-    $cudnnWin = "cudnn-9.0-windows10-x64-v7.zip"
+    $prodName = "NVidia CUDNN 7.3.1 for CUDA 10.0"
+    $cudnnWin = "cudnn-10.0-windows10-x64-v7.3.1.20.zip"

-    $prodSubDir =  "cudnn-9.0-v7.0.5"
+    $prodSubDir =  "cudnn-10.0-v7.3.1"
    $targetPath = join-path $targetFolder $prodSubDir
    $envVar = "CUDNN_PATH"
    $envValue = join-path $targetPath "cuda"
-    $downloadSource = "http://developer.download.nvidia.com/compute/redist/cudnn/v7.0.5"
+    $downloadSource = "http://developer.download.nvidia.com/compute/redist/cudnn/v7.3.1"

-    @( @{ShortName = "CUDNN7090"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName";
+    @( @{ShortName = "CUDNN73100"; VerifyInfo = "Checking for $prodName in $targetPath"; ActionInfo = "Installing $prodName";
         Verification = @( @{Function = "VerifyDirectory"; Path = $targetPath },
                           @{Function = "VerifyDirectory"; Path = $envValue },
                           @{Function = "VerifyEnvironmentAndData"; EnvVar = $envVar; Content = $envValue } );
@ -308,13 +308,13 @@ function OpCheckVS2017
                        } )
 }

-function OpCheckCuda9
+function OpCheckCuda10
 {
-    $programPath = join-path $env:ProgramFiles "NVIDIA GPU Computing Toolkit\CUDA\v9.0"
-    @( @{Name = "Verify Installation of NVidia Cuda 9.0"; ShortName = "PRECUDA90"; VerifyInfo = "Checking for NVidia Cuda 9.0"; 
+    $programPath = join-path $env:ProgramFiles "NVIDIA GPU Computing Toolkit\CUDA\v10.0"
+    @( @{Name = "Verify Installation of NVidia Cuda 10.0"; ShortName = "PRECUDA100"; VerifyInfo = "Checking for NVidia Cuda 10.0";
         Verification = @( @{Function = "VerifyDirectory"; Path = $programPath },
-                           @{Function = "VerifyEnvironmentAndData"; EnvVar = "CUDA_PATH_V9_0"; Content = $programPath } ); 
-         PreReq = @( @{Function = "PrereqInfoCuda9" } );
+                           @{Function = "VerifyEnvironmentAndData"; EnvVar = "CUDA_PATH_V10_0"; Content = $programPath } );
+         PreReq = @( @{Function = "PrereqInfoCuda10" } );
         Action = @( @{Function = "StopInstallation" } )
        } )
 }
--- a/Tools/devInstall/Windows/helper/PreRequisites.ps1
+++ b/Tools/devInstall/Windows/helper/PreRequisites.ps1
@ -44,14 +44,14 @@ for more details.
 "
 }

-function PrereqInfoCuda9(
+function PrereqInfoCuda10(
    [Parameter(Mandatory = $true)][hashtable] $table
 )
 {
    FunctionIntro $table
    Write-Warning "

-Installation of NVidia CUDA 9.0 is a pre-requisite before installation can continue.
+Installation of NVidia CUDA 10.0 is a pre-requisite before installation can continue.
 Please check 
  https://docs.microsoft.com/en-us/cognitive-toolkit/Setup-CNTK-on-Windows
 for more details.
--- a/Tools/docker/CNTK-GPU-Image/Dockerfile
+++ b/Tools/docker/CNTK-GPU-Image/Dockerfile
@ -1,16 +1,14 @@
-# Tag: nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
-# Created: 2017-11-21T06:34:14.675603521Z
-# Label: com.nvidia.build.id: 41212533
-# Label: com.nvidia.build.ref: e0edb5359ecb7bd3d86f0c9bfa18c2260b741ebb
-# Label: com.nvidia.cuda.version: 9.0.176
-# Label: com.nvidia.cudnn.version: 7.0.4.31
-# Label: com.nvidia.nccl.version: 2.1.2
+# Tag: nvidia/cuda:10.0-cudnn7-devel-ubuntu16.04
+# Created: 2018-10-22T21:14:30.605789926Z
+# Label: com.nvidia.cuda.version: 10.0.
+# Label: com.nvidia.cudnn.version: 7.3.1.20
+# Label: com.nvidia.nccl.version: 2.3.5
 #
 # To build, run from the parent with the command line:
 # 	docker build -t <image name> -f CNTK-GPU-Image/Dockerfile .

 # Ubuntu 16.04.5
-FROM nvidia/cuda@sha256:33add9c50ab76b8f3a92187c0418ed600d5bea27690fda40711122fdc28ce2f4
+FROM nvidia/cuda@sha256:362e4e25aa46a18dfa834360140e91b61cdb0a3a2796c8e09dadb268b9de3f6b

 RUN apt-get update && apt-get install -y --no-install-recommends \
        autotools-dev \
@ -85,7 +83,7 @@ RUN LIBZIP_VERSION=1.1.2 && \

 ENV LD_LIBRARY_PATH /usr/local/lib:$LD_LIBRARY_PATH

-RUN wget -q -O - https://github.com/NVlabs/cub/archive/1.7.4.tar.gz | tar -C /usr/local -xzf -
+RUN wget -q -O - https://github.com/NVlabs/cub/archive/1.8.0.tar.gz | tar -C /usr/local -xzf -

 RUN OPENCV_VERSION=3.1.0 && \
    wget -q -O - https://github.com/Itseez/opencv/archive/${OPENCV_VERSION}.tar.gz | tar -xzf - && \
--- a/bindings/java/Swig/post-build.cmd
+++ b/bindings/java/Swig/post-build.cmd
@ -25,7 +25,7 @@ echo Building java.
 if not exist "%project_dir%com\microsoft\CNTK\lib\windows" mkdir "%project_dir%com\microsoft\CNTK\lib\windows"

 if "%is_gpu%" == "true" (
-  for %%x in (cublas64_90.dll cudart64_90.dll cudnn64_7.dll curand64_90.dll cusparse64_90.dll nvml.dll) do (
+  for %%x in (cublas64_100.dll cudart64_100.dll cudnn64_7.dll curand64_100.dll cusparse64_100.dll nvml.dll) do (
    copy "%output_dir%/%%x" ".\com\microsoft\CNTK\lib\windows\%%x" 
    echo %%x>> .\com\microsoft\CNTK\lib\windows\NATIVE_MANIFEST
  )
--- a/10
+++ b/10
@ -127,13 +127,13 @@ default_openblas=""

 default_boost="boost-1.60.0"

-default_cudas="cuda-9.0"
+default_cudas="cuda-10.0"
 default_nccls="nccl"
 default_kaldis="kaldi-trunk kaldi-c024e8aa"
 default_gdk_includes="include/nvidia/gdk cuda/include"
 default_gdk_nvml_libs="src/gdk/nvml/lib cuda/lib64/stubs"
-default_cubs="cub-1.7.4"
-default_cudnns="cudnn-7.0"
+default_cubs="cub-1.8.0"
+default_cudnns="cudnn-7.3"
 default_opencvs="opencv-3.1.0 opencv-3.0.0"
 default_protobuf="protobuf-3.1.0"
 default_libzips="libzip-1.1.2"
@ -688,7 +688,7 @@ do
                then
                    echo "Cannot find NVIDIA CUB directory."
                    echo "Please specify a value for --with-cub"
-                    echo "NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.4.1.zip, extract the archive to /usr/local"
+                    echo "NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.8.0.zip, extract the archive to /usr/local"
                    exit 1
                fi
            else
@ -1082,7 +1082,7 @@ then
    if test x$cub_path = x ; then
        echo Cannot locate NVIDIA CUB directory
        echo GPU will be disabled
-        echo NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.4.1.zip, extract the archive to /usr/local
+        echo NVIDIA CUB can be downloaded from https://github.com/NVlabs/cub/archive/1.8.0.zip, extract the archive to /usr/local
        enable_cuda=no
    else
        echo Found CUB at $cub_path