Merge branch 'master' of https://github.com/Microsoft/CNTK into amitaga/cntkv2Library

This commit is contained in:
Amit Agarwal 2016-06-15 11:11:37 -07:00
Родитель 0ec5b27404 0f9e9514dd
Коммит a9f769884a
60 изменённых файлов: 1450 добавлений и 255 удалений

4
.gitignore поставляемый
Просмотреть файл

@ -190,3 +190,7 @@ Source/CNTK/buildinfo.h$$
# Unit test output
Tests/UnitTests/ReaderTests/Control/**/*_Output.txt
Tests/UnitTests/NetworkTests/Output/
Dependencies/CNTKCustomMKL/Publish
Dependencies/CNTKCustomMKL/CNTKCustomMKL-Linux-*.tgz
Dependencies/CNTKCustomMKL/CNTKCustomMKL-Windows-*.zip

Просмотреть файл

@ -29,6 +29,47 @@
</PropertyGroup>
<Choose>
<When Condition="Exists('$(ACML_PATH)')">
<PropertyGroup>
<MathLibraryName>ACML</MathLibraryName>
<MathIncludePath>$(ACML_PATH)\include</MathIncludePath>
<MathLibraryPath>$(ACML_PATH)\lib</MathLibraryPath>
<MathLinkLibrary>libacml_mp_dll.lib</MathLinkLibrary>
<MathDelayLoad>libacml_mp_dll.dll</MathDelayLoad>
<MathPostBuildCopyPattern>$(ACML_PATH)\lib\*.dll</MathPostBuildCopyPattern>
<UnitTestDlls>$(OutDir)libacml_mp_dll.dll;$(OutDir)libifcoremd.dll;$(OutDir)libifportmd.dll;$(OutDir)libiomp*.dll;$(OutDir)libmmd.dll;$(OutDir)svml_dispmd.dll;</UnitTestDlls>
<MathDefine>USE_ACML</MathDefine>
</PropertyGroup>
</When>
<!-- See https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#optional-mkl on how to configure to build CNTK with MKL -->
<When Condition="'$(CNTK_MKL)' == '1'">
<PropertyGroup>
<CNTKCustomMKLVersion>1</CNTKCustomMKLVersion>
<CNTKCustomMKLPath>$(CNTK_MKL_PATH)\$(CNTKCustomMKLVersion)</CNTKCustomMKLPath>
<MathIncludePath>$(CNTKCustomMKLPath)\include</MathIncludePath>
<MathDefine>USE_MKL</MathDefine>
</PropertyGroup>
<PropertyGroup Condition="'$(CNTK_MKL_SEQUENTIAL)' != '1'">
<MathLibraryName>CNTK custom MKL Parallel (Version: $(CNTKCustomMKLVersion))</MathLibraryName>
<MathLibraryPath>$(CNTKCustomMKLPath)\x64\parallel</MathLibraryPath>
<MathLinkLibrary>mkl_cntk_p.lib</MathLinkLibrary>
<MathDelayLoad>mkl_cntk_p.dll</MathDelayLoad>
<MathPostBuildCopyPattern>$(MathLibraryPath)\*.dll</MathPostBuildCopyPattern>
<UnitTestDlls>$(OutDir)mkl_cntk_p.dll;$(OutDir)libiomp5md.dll;</UnitTestDlls>
</PropertyGroup>
<PropertyGroup Condition="'$(CNTK_MKL_SEQUENTIAL)' == '1'">
<MathLibraryName>CNTK custom MKL Sequential (Version: $(CNTKCustomMKLVersion))</MathLibraryName>
<MathLibraryPath>$(CNTKCustomMKLPath)\x64\sequential</MathLibraryPath>
<MathLinkLibrary>mkl_cntk_s.lib</MathLinkLibrary>
<MathDelayLoad>mkl_cntk_s.dll</MathDelayLoad>
<MathPostBuildCopyPattern>$(MathLibraryPath)\*.dll</MathPostBuildCopyPattern>
<UnitTestDlls>$(OutDir)mkl_cntk_s.dll;</UnitTestDlls>
</PropertyGroup>
</When>
</Choose>
<PropertyGroup Condition="'$(CudaVersion)' == '7.5'">
<CudaPath>$(CUDA_PATH_V7_5)</CudaPath>
<CudaRuntimeDll>cudart64_75.dll</CudaRuntimeDll>

8
Dependencies/CNTKCustomMKL/README-for-redistributable.txt поставляемый Normal file
Просмотреть файл

@ -0,0 +1,8 @@
This archive contains header files as well as redistributable components of
the Intel (r) Math Kernel Library (Intel (r) MKL).
Included is also a custom library created out of Intel (r) Math Kernel Library
(Intel (r) MKL) required for building the Microsoft Computational Network
Toolkit (CNTK). More details on CNTK be found here: http://www.cntk.ai
Please see LICENSE.md for full license information.

23
Dependencies/CNTKCustomMKL/README.md поставляемый Normal file
Просмотреть файл

@ -0,0 +1,23 @@
# CNTK custom MKL
This directory contains the necessary files to create a custom Intel® Math Kernel Library (MKL)
for usage by CNTK ("CNTK custom MKL" for short).
By default, a CNTK binary with Intel® MKL support includes a prebuilt CNTK
custom MKL.
If you want to build CNTK with Intel® MKL support yourself, you can install a
prebuilt CNTK custom MKL, available for download from the [CNTK web site](https://www.cntk.ai/mkl).
See [CNTK's setup instructions](https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-your-machine)
for more details.
If you want to add new Intel® MKL functions to be used by CNTK you will have to
build your own CNTK custom MKL.
This requires you to install the [Intel MKL SDK](https://software.intel.com/en-us/intel-mkl/) for your platform.
Then, in this directory,
* extend the file `headers.txt` to expose new headers,
* extend the file `functions.txt` to expose new functions, and
* use `build-linux.sh` or `build-windows.cmd` to build for your platform.
For further documentation please see the Developer Guide for the Intel® MKL, in particular
[Building Custom Shared Objects (Linux)](https://software.intel.com/en-us/node/528533) and
[Building Custom Dynamic-link Libraries (Windows)](https://software.intel.com/en-us/node/528362).

46
Dependencies/CNTKCustomMKL/build-linux.sh поставляемый Executable file
Просмотреть файл

@ -0,0 +1,46 @@
#!/bin/bash
#
# Copyright (c) Microsoft. All rights reserved.
#
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
#
# Stop on error, trace commands
set -e -x
# Enter directory the script is located in
cd "$( dirname "${BASH_SOURCE[0]}" )"
# TODO configurable
MKLROOT=/opt/intel/compilers_and_libraries_2016.2.181/linux/mkl
MKLBUILDERROOT=$MKLROOT/tools/builder
CNTKCUSTOMMKLVERSION=$(cat version.txt)
rm -rf Publish
mkdir Publish{,/$CNTKCUSTOMMKLVERSION{,/x64}}
for THREADING in parallel sequential
do
LIBBASENAME=libmkl_cntk_$(echo $THREADING | cut -c 1)
make -f $MKLBUILDERROOT/makefile libintel64 \
export=functions.txt \
threading=$THREADING \
name=$LIBBASENAME \
MKLROOT=$MKLROOT
mkdir Publish/$CNTKCUSTOMMKLVERSION/x64/$THREADING
mv $LIBBASENAME.so Publish/$CNTKCUSTOMMKLVERSION/x64/$THREADING
done
cp -p $MKLROOT/../compiler/lib/intel64_lin/libiomp5.so Publish/$CNTKCUSTOMMKLVERSION/x64/parallel
rsync -av --files-from headers.txt $MKLROOT/include Publish/$CNTKCUSTOMMKLVERSION/include
cp -p README-for-redistributable.txt Publish/$CNTKCUSTOMMKLVERSION/README.txt
cp -p ../../LICENSE.md Publish/$CNTKCUSTOMMKLVERSION
cd Publish
tar -czf ../CNTKCustomMKL-Linux-$CNTKCUSTOMMKLVERSION.tgz $CNTKCUSTOMMKLVERSION
cd ..

156
Dependencies/CNTKCustomMKL/build-windows.cmd поставляемый Normal file
Просмотреть файл

@ -0,0 +1,156 @@
@echo off
REM
REM Copyright (c) Microsoft. All rights reserved.
REM
REM Licensed under the MIT license. See LICENSE.md file in the project root
REM for full license information.
REM ==============================================================================
REM
echo.
echo This batch file will build a custom MKL dynamic link library for usage by CNTK.
echo.
echo Requirements:
echo - Intel MKL SDK installed on the machine
echo - MKLROOT environment variable is set to the MKL directory inside the Intel MKL SDK
echo - Visual Studio 2013 installed and included in the path
echo.
setlocal enableextensions enabledelayedexpansion
pushd "%~dp0"
if errorlevel 1 (
echo Could not change directory to script location.
exit /b 1
)
if not defined MKLROOT (
echo Error: Environment variable MKLROOT is undefined.
exit /b 1
)
if not exist "%MKLROOT%" (
echo Error: Directory doesn't exist: "%MKLROOT%".
exit /b 1
)
set MKLBUILDERROOT=%MKLROOT%\tools\builder
if not exist "%MKLBUILDERROOT%" (
echo Error: Directory doesn't exist: "%MKLBUILDERROOT%".
exit /b 1
)
where /q nmake.exe
if errorlevel 1 (
echo Error: NMAKE.EXE not in path.
exit /b 1
)
where /q link.exe
if errorlevel 1 (
echo Error: LINK.EXE not in path.
exit /b 1
)
set /p CNTKCUSTOMMKLVERSION=<version.txt
if not defined CNTKCUSTOMMKLVERSION (
echo Cannot determine CNTK custom MKL version.
exit /b 1
)
if exist lib rmdir /s /q lib
if errorlevel 1 exit /b 1
if exist Publish rmdir /s /q Publish
if errorlevel 1 exit /b 1
mkdir Publish\%CNTKCUSTOMMKLVERSION%\x64
echo.
echo Copying "%MKLBUILDERROOT%\lib".
xcopy /s /e /y /i "%MKLBUILDERROOT%\lib" lib
if errorlevel 1 (
exit /b 1
)
echo.
echo Compiling and copying libraries.
for %%t in (
parallel
sequential
) do (
set TFIRSTCHAR=%%t
set TFIRSTCHAR=!TFIRSTCHAR:~0,1!
set LIBBASENAME=mkl_cntk_!TFIRSTCHAR!
echo.
echo Calling NMAKE libintel64 export=functions.txt threading=%%t name=!LIBBASENAME! MKLROOT="%MKLROOT%".
NMAKE /f "%MKLBUILDERROOT%\makefile" ^
libintel64 ^
export=functions.txt ^
threading=%%t ^
name=!LIBBASENAME! ^
MKLROOT="%MKLROOT%"
if errorlevel 1 (
echo Error: NMAKE.exe for threading=%%t failed.
exit /b 1
)
mkdir Publish\%CNTKCUSTOMMKLVERSION%\x64\%%t
if errorlevel 1 exit /b 1
move !LIBBASENAME!.dll Publish\%CNTKCUSTOMMKLVERSION%\x64\%%t
if errorlevel 1 exit /b 1
move !LIBBASENAME!.lib Publish\%CNTKCUSTOMMKLVERSION%\x64\%%t
if errorlevel 1 exit /b 1
del !LIBBASENAME!*
if errorlevel 1 exit /b 1
@REM TODO manifest?
)
echo.
echo Copying libiomp5md.dll.
copy "%MKLROOT%\..\redist\intel64_win\compiler\libiomp5md.dll" Publish\%CNTKCUSTOMMKLVERSION%\x64\parallel
if errorlevel 1 (
exit /b 1
)
echo.
echo Removing LIB directory.
rmdir /s /q lib
if errorlevel 1 exit /b 1
echo.
echo Copying include files to Publish\%CNTKCUSTOMMKLVERSION%\include.
mkdir Publish\%CNTKCUSTOMMKLVERSION%\include
for /f %%h in (headers.txt) do (
copy "%MKLROOT%\include\%%h" Publish\%CNTKCUSTOMMKLVERSION%\include
if errorlevel 1 (
echo Failed to copy "%MKLROOT%\include\%%h".
exit /b 1
)
)
copy README-for-redistributable.txt Publish\%CNTKCUSTOMMKLVERSION%\README.txt
if errorlevel 1 (
echo Failed to copy README.
exit /b 1
)
copy ..\..\LICENSE.md Publish\%CNTKCUSTOMMKLVERSION%
if errorlevel 1 (
echo Failed to copy LICENSE.md.
exit /b 1
)
popd

17
Dependencies/CNTKCustomMKL/functions.txt поставляемый Normal file
Просмотреть файл

@ -0,0 +1,17 @@
cblas_dgemm
cblas_dasum
cblas_daxpy
cblas_dcopy
cblas_ddot
cblas_dnrm2
cblas_dscal
cblas_sasum
cblas_saxpy
cblas_scopy
cblas_sgemm
cblas_sscal
cblas_sdot
cblas_snrm2
dgesvd
sgesvd
MKL_Set_Num_Threads

31
Dependencies/CNTKCustomMKL/headers.txt поставляемый Normal file
Просмотреть файл

@ -0,0 +1,31 @@
mkl_blas.h
mkl_cblas.h
mkl_df_defines.h
mkl_df_functions.h
mkl_df_types.h
mkl_df.h
mkl_dfti.h
mkl_direct_call.h
mkl_dss.h
mkl_lapack.h
mkl_lapacke.h
mkl_pardiso.h
mkl_poisson.h
mkl_rci.h
mkl_service.h
mkl_solvers_ee.h
mkl_sparse_handle.h
mkl_spblas.h
mkl_trans.h
mkl_trig_transforms.h
mkl_types.h
mkl_version.h
mkl_vml_defines.h
mkl_vml_functions.h
mkl_vml_types.h
mkl_vml.h
mkl_vsl_defines.h
mkl_vsl_functions.h
mkl_vsl_types.h
mkl_vsl.h
mkl.h

1
Dependencies/CNTKCustomMKL/version.txt поставляемый Normal file
Просмотреть файл

@ -0,0 +1 @@
1

Просмотреть файл

@ -106,7 +106,7 @@ train = [
test = [
action = "test"
minibatchSize = 1024
minibatchSize = 1024 # reduce this if you run out of memory
evalNodeNames = ce:errs:top5Errs

Просмотреть файл

@ -0,0 +1,101 @@
# Parameters can be overwritten on the command line
# for example: cntk configFile=myConfigFile RootDir=../..
# For running from Visual Studio add
# currentDirectory=$(SolutionDir)/<path to corresponding data folder>
RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
deviceId = 0
imageLayout = "cudnn"
# Override the above as follows when running on CPU:
# deviceId = -1
command = train:test
precision = "float"
modelPath = "$ModelDir$/04_DeConv"
ndlMacros = "$ConfigDir$/Macros.ndl"
# uncomment the following line to write logs to a file
# stderr = "$OutputDir$/04_DeConv_out"
traceLevel=1
numMBsToShowResult=500
prefetch=true
# If set to true, always initialize the network on CPU, making initialization consistent across CPU and GPU targets (for testing).
initOnCPUOnly=true
#######################################
# TRAINING CONFIG #
#######################################
train = [
action = "train"
NDLNetworkBuilder = [
networkDescription = "$ConfigDir$/04_DeConv.ndl"
]
SGD = [
epochSize = 60000
minibatchSize = 32
learningRatesPerMB = 0.001
momentumPerMB = 0.9
maxEpochs = 10
]
# Note: this reader crashes if randomization is turned on.
reader = [
readerType = "UCIFastReader"
# To get the data (Train-28x28.txt) please run `python mnist_convert.py`
# from the 'AdditionalFiles' folder. See REAMDE.md for details.
file = "$DataDir$/Train-28x28.txt"
features = [
dim = 784
start = 1
]
labels = [
dim = 1
start = 0
labelDim = 10
labelMappingFile = "$DataDir$/labelsmap.txt"
]
]
]
#######################################
# TEST CONFIG #
#######################################
test = [
action = test
minibatchSize = 16
NDLNetworkBuilder = [
networkDescription = "$ConfigDir$/04_DeConv.ndl"
]
reader = [
readerType = "UCIFastReader"
file = "$DataDir$/Test-28x28.txt"
features = [
dim = 784
start = 1
]
labels = [
dim = 1
start = 0
labelDim = 10
labelMappingFile = "$DataDir$/labelsmap.txt"
]
]
]

Просмотреть файл

@ -0,0 +1,60 @@
# macros to include
load = ndlMnistMacros
# the actual NDL that defines the network
run = DNN
ndlMnistMacros = [
imageW = 28
imageH = 28
imageC = 1
labelDim = 10
features = ImageInput(imageW, imageH, imageC, imageLayout=$imageLayout$)
featScale = Constant(0.00390625)
featScaled = Scale(featScale, features)
labels = InputValue(labelDim)
]
DNN=[
# conv1
kW1 = 5
kH1 = 5
cMap1 = 16
hStride1 = 2
vStride1 = 2
wScale1 = 0.1
bValue1 = 0
# weight[cMap1, kW1 * kH1 * inputChannels]
# Conv2DReLULayer is defined in Macros.ndl
conv1 = Conv2DReLULayer(featScaled, cMap1, 25, kW1, kH1, hStride1, vStride1, wScale1, bValue1)
# pool1
pool1W = 2
pool1H = 2
pool1hStride = 2
pool1vStride = 2
# MaxPooling is a standard NDL node.
pool1 = MaxPooling(conv1, pool1W, pool1H, pool1hStride, pool1vStride, imageLayout=$imageLayout$)
#unpool1
unpool1 = MaxUnpool(pool1, conv1, pool1W, pool1H, pool1hStride, pool1vStride)
# deconv1
lpad1 = 2
upad1 = 1
# weight[cMap1, kW1 * kH1 * inputChannels]
# DeconvReLULayer is defined in Macros.ndl
deconv1 = DeconvReLULayer(unpool1, kW1, kH1, imageC, 25, cMap1, hStride1, vStride1, lpad1, upad1, wScale1, bValue1)
mse = SquareError(featScaled, deconv1)
#err = ErrorPrediction(labels, ol)
# Special Nodes
FeatureNodes = (features)
#LabelNodes = (labels)
CriterionNodes = (mse)
#EvalNodes = (err)
#OutputNodes = (deconv1)
]

Просмотреть файл

@ -48,6 +48,10 @@ ConvND(w, inp, kW, kH, inMap, outMap, hStride, vStride) = [
c = Convolution(w, inp, {kW, kH, inMap}, mapCount=outMap, stride={hStride, vStride, inMap}, sharing={true, true, true}, autoPadding={true, true, false}, lowerPad=0, upperPad=0, imageLayout=$imageLayout$)
]
DeConv(w, inp, kW, kH, inMap, outMap, hStride, vStride, lpad, upad) = [
c = Convolution(w, inp, {kW, kH, inMap}, mapCount=outMap, stride={hStride, vStride, inMap}, sharing={true, true, true}, autoPadding=false, lowerPad={lpad, lpad, 0}, upperPad={upad, upad, 0}, transpose=1, imageLayout=$imageLayout$)
]
Conv2DReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue) = [
w = ConvW(outMap, inWCount, wScale)
b = ConvB(outMap, bValue)
@ -84,6 +88,17 @@ ConvBNReLULayer(inp, outMap, inWCount, kW, kH, hStride, vStride, wScale, bValue,
y = RectifiedLinear(c)
]
DeconvReLULayer(inp, kW, kH, inMap, inWCount, outMap, hStride, vStride, lpad, upad, wScale, bValue) = [
# No bias here.
w = ConvW(outMap, inWCount, wScale)
act = RectifiedLinear(inp)
out = DeConv(w, act, kW, kH, inMap, outMap, hStride, vStride, lpad, upad)
]
MaxNDPooling(inp, kW, kH, hStride, vStride) = [
p = Pooling(inp, "max", {kW, kH, 1}, stride={hStride, vStride, 1}, autoPadding={true, true, false}, lowerPad=0, upperPad=0, imageLayout=$imageLayout$)
]
p = Pooling(inp, "max", {kW, kH, 1}, stride={hStride, vStride, 1}, autoPadding={true, true, false}, lowerPad=0, upperPad=0, imageLayout=$imageLayout$)
]
MaxUnpool(inp, poolInp, kW, kH, hStride, vStride) = [
up = MaxUnpooling(inp, poolInp, {kW, kH, 1}, stride={hStride, vStride, 1}, autoPadding={false, false, false}, lowerPad=0, upperPad=0, imageLayout=$imageLayout$)
]

Просмотреть файл

@ -0,0 +1,69 @@
'
</s>
<s/>
<s>
A
B
C
D
E
F
G
H
I
J
K
L
M
N
O
P
Q
R
S
T
U
V
W
X
Y
Z
~AA
~AE
~AH
~AO
~AW
~AY
~B
~CH
~D
~DH
~EH
~ER
~EY
~F
~G
~HH
~IH
~IY
~JH
~K
~L
~M
~N
~NG
~OW
~OY
~P
~R
~S
~SH
~T
~TH
~UH
~UW
~V
~W
~Y
~Z
~ZH

Просмотреть файл

@ -12,7 +12,11 @@ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLI
This project is based on or incorporates material from the projects listed below (Third Party IP). The original copyright notice and the license under which Microsoft received such Third Party IP, are set forth below. Such licenses and notices are provided for informational purposes only. Where permitted, Microsoft licenses the Third Party IP to you under the licensing terms for the Microsoft product. Microsoft reserves all other rights not expressly granted under this agreement, whether by implication, estoppel or otherwise.
### a. BOOST C++ LIBRARIES
### a. INTEL (R) MATH KERNEL LIBRARY (INTEL (R) MKL)
CNTK distribution contains Redistributable components of Intel (r) Math Kernel Library (Intel (r) MKL)
### b. BOOST C++ LIBRARIES
Copyright Beman Dawes, David Abrahams, 1998-2005.
Copyright Rene Rivera 2004-2007.
@ -27,7 +31,7 @@ The copyright notices in the Software and this entire statement, including the a
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
### b. ATIS DATASETS
### c. ATIS DATASETS
CNTK distribution contains a subset of ATIS Datasets:
@ -39,23 +43,23 @@ Dahl, Deborah, et al. ATIS3 Test Data LDC95S26. Web Download. Philadelphia: Ling
Dahl, Deborah, et al. ATIS3 Training Data LDC94S19. Web Download. Philadelphia: Linguistic Data Consortium, 1994.
### c. TIMIT ACOUSTIC-PHONETIC CONTINUOUS SPEECH CORPUS
### d. TIMIT ACOUSTIC-PHONETIC CONTINUOUS SPEECH CORPUS
CNTK distribution contains a subset of TIMIT Acoustic-Phonetic Continuous Speech Corpus:
Garofolo, John, et al. TIMIT Acoustic-Phonetic Continuous Speech Corpus LDC93S1. Web Download. Philadelphia: Linguistic Data Consortium, 1993.
### d. THE PENN TREEBANK PROJECT
### e. THE PENN TREEBANK PROJECT
CNTK distribution contains a subset of the data of The Penn Treebank Project:
Marcus, Mitchell, Beatrice Santorini, and Mary Ann Marcinkiewicz. Treebank-2 LDC95T7. Web Download. Philadelphia: Linguistic Data Consortium, 1995.
### e. THE CMU AUDIO DATABASES
### f. THE CMU AUDIO DATABASES
CNTK distribution contains a subset of the CMU Audio Databases
Copyright (c) 1991-2005 Carnegie Mellon University. All rights reserved.
### f. THE MNIST DATABASE OF HANDWRITTEN DIGITS
### g. THE MNIST DATABASE OF HANDWRITTEN DIGITS
CNTK distribution contains a subset of the MNIST Database of Handwritten Digits
CNTK distribution contains a subset of the MNIST Database of Handwritten Digits

Просмотреть файл

@ -11,7 +11,11 @@
# defaults to release
# ACML_PATH= path to ACML library installation
# only needed if MATHLIB=acml
# MKL_PATH= path to MKL library installation
# MKL_PATH= path to CNTK custom MKL installation
# only needed if MATHLIB=mkl
# CNTK_CUSTOM_MKL_VERSION=2
# version for the CNTK custom MKL installation
# MKL_THREADING=parallel|sequential
# only needed if MATHLIB=mkl
# GDK_PATH= path to cuda gdk installation, so $(GDK_PATH)/include/nvidia/gdk/nvml.h exists
# defaults to /usr
@ -131,9 +135,15 @@ ifeq ("$(MATHLIB)","acml")
endif
ifeq ("$(MATHLIB)","mkl")
INCLUDEPATH += $(MKL_PATH)/mkl/include
LIBPATH += $(MKL_PATH)/compiler/lib/intel64 $(MKL_PATH)/mkl/lib/intel64 $(MKL_PATH)/compiler/lib/mic $(MKL_PATH)/mkl/lib/mic
LIBS += -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lm -liomp5 -lpthread
INCLUDEPATH += $(MKL_PATH)/$(CNTK_CUSTOM_MKL_VERSION)/include
LIBS += -lm
ifeq ("$(MKL_THREADING)","sequential")
LIBPATH += $(MKL_PATH)/$(CNTK_CUSTOM_MKL_VERSION)/x64/sequential
LIBS += -lmkl_cntk_s
else
LIBPATH += $(MKL_PATH)/$(CNTK_CUSTOM_MKL_VERSION)/x64/parallel
LIBS += -lmkl_cntk_p -liomp5 -lpthread
endif
COMMON_FLAGS += -DUSE_MKL
endif
@ -168,7 +178,7 @@ GENCODE_SM50 := -gencode arch=compute_50,code=\"sm_50,compute_50\"
# Use GCOV_PREFIX and GCOV_PREFIX_STRIP if relocating:
# For example, if the object file /user/build/foo.o was built with -fprofile-arcs, the final executable will try to create the data file
# /user/build/foo.gcda when running on the target system. This will fail if the corresponding directory does not exist and it is unable
# to create it. This can be overcome by, for example, setting the environment as GCOV_PREFIX=/target/run and GCOV_PREFIX_STRIP=1.
# to create it. This can be overcome by, for example, setting the environment as 'GCOV_PREFIX=/target/run' and 'GCOV_PREFIX_STRIP=1'.
# Such a setting will name the data file /target/run/build/foo.gcda
ifdef CNTK_CODE_COVERAGE
CXXFLAGS += -fprofile-arcs -ftest-coverage

Просмотреть файл

@ -1,6 +1,9 @@
# CNTK
## Latest news
*2016-06-15.* CNTK now supports building against a custom Intel® Math Kernel Library (MKL).
See [setup instructions](https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-your-machine) on how to set this up for your platform.
*2016-06-10.* See CNTK v.1.5 binary release announcement in the official [Microsoft Research Blog](https://blogs.msdn.microsoft.com/msr_er/2016/06/10/microsoft-improves-programming-flexibility-of-its-ai-toolkit/)
*2016-06-08.* V 1.5 Binary release
@ -10,8 +13,6 @@ CNTK v.1.5 binaries are on the [CNTK Releases page](https://github.com/Microsoft
*2016-05-19.* A 1-hour talk describing CNTK, how to use it, and how it works, has been posted at [Presentations](https://github.com/Microsoft/CNTK/wiki/Presentations).
*2016-05-16.* An example illustrating [Using CNTK with ResNet](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Miscellaneous/ImageNet/ResNet) is added to the codebase. The example contains some pre-trained models that can be used in various applications.
See [all news](https://github.com/Microsoft/CNTK/wiki/News).
## What is CNTK

Просмотреть файл

@ -38,6 +38,7 @@
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\SGDLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\CNTK;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(MSMPI_INC);$(NvmlInclude)</AdditionalIncludeDirectories>
<DisableSpecificWarnings>4819</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(MSMPI_LIB64);$(OutDir);$(NvmlLibPath)</AdditionalLibraryDirectories>

Просмотреть файл

@ -292,24 +292,33 @@ void NDLNodeEvaluatorImpl<ElemType>::Evaluate(NDLNode<ElemType>* node, const wst
nodePtr = builder.FutureValue(NULL, defaultHiddenActivity, rows, timeStep, name);
}
}
else if (cnNodeType == OperationNameOf(ConvolutionNode) || cnNodeType == OperationNameOf(PoolingNode))
else if (cnNodeType == OperationNameOf(ConvolutionNode) ||
cnNodeType == OperationNameOf(PoolingNode) ||
cnNodeType == OperationNameOf(MaxUnpoolingNode))
{
if (parameter.size() != 3 && parameter.size() != 7)
if (parameter.size() != 2 && parameter.size() != 3 && parameter.size() != 7)
{
if (cnNodeType == OperationNameOf(ConvolutionNode))
{
RuntimeError("%ls: unexpected parameter count. %ls supports 2 modes: \n"
"1. 2D convolution which takes 7 fixed parameters [weightNodeName, inputValueNodeName, kernelWidth, kernelHeight, outputChannels,horizontalSubsample, verticalSubsample] \n"
"1. 2D convolution which takes 7 fixed parameters [weightNodeName, inputValueNodeName, kernelWidth, kernelHeight, outputChannels, horizontalSubsample, verticalSubsample] \n"
"and two optional parameters [zeroPadding = [false|yourvalue], maxTempMemSizeInSamples = [0|yourvalue], imageLayout = \"HWC\"|\"cudnn\"]. \n"
"2. ND convolution which takes 3 fixed parameters [weightNodeName, inputValueNodeName, kernelShape] and \n"
"9 optional parameters [mapCount = [1|yourvalue], stride = [1|yourvalue], sharing = [true|yourvalue], autoPadding = [true|yourvalue], lowerPad = [0|yourvalue], upperPad = [0|yourvalue], maxTempMemSizeInSamples = [0|yourvalue], imageLayout = \"cudnn\"|\"HWC\"]. \n"
"10 optional parameters [mapCount = [1|yourvalue], stride = [1|yourvalue], sharing = [true|yourvalue], autoPadding = [true|yourvalue], lowerPad = [0|yourvalue], upperPad = [0|yourvalue], bool transpose = [false|yourvalue], maxTempMemSizeInSamples = [0|yourvalue], imageLayout = \"cudnn\"|\"HWC\"]. \n"
"For ND convolution, parameters kernelShape, mapCount, stride, sharing, autoPadding, lowerPad, upperPad can be arrays, e.g. kernelShape={5, 5, 3}",
cnNodeType.c_str(), cnNodeType.c_str());
}
else
else if (cnNodeType == OperationNameOf(PoolingNode))
{
RuntimeError("%ls: unexpected parameter count. %ls 3 fixed parameters [inputValueNodeName, poolKind, kernelShape] and \n"
"5 optional parameters stride = [1|yourvalue], autoPadding = [true|yourvalue], lowerPad = [0|yourvalue], upperPad = [0|yourvalue], imageLayout = \"cudnn\"|\"HWC\"]. \n"
"5 optional parameters stride = [1|yourvalue], autoPadding = [true|yourvalue], lowerPad = [0|yourvalue], upperPad = [0|yourvalue], imageLayout = \"cudnn\"]. \n"
"Parameters kernelShape, stride, autoPadding, lowerPad, upperPad can be arrays, e.g. kernelShape={5, 5, 3}",
cnNodeType.c_str(), cnNodeType.c_str());
}
else if (cnNodeType == OperationNameOf(MaxUnpoolingNode))
{
RuntimeError("%ls: unexpected parameter count. %ls 3 fixed parameters [inputValueNodeName, mask, kernelShape] and \n"
"5 optional parameters stride = [1|yourvalue], autoPadding = [true|yourvalue], lowerPad = [0|yourvalue], upperPad = [0|yourvalue], imageLayout = \"cudnn\"]. \n"
"Parameters kernelShape, stride, autoPadding, lowerPad, upperPad can be arrays, e.g. kernelShape={5, 5, 3}",
cnNodeType.c_str(), cnNodeType.c_str());
}
@ -317,11 +326,13 @@ void NDLNodeEvaluatorImpl<ElemType>::Evaluate(NDLNode<ElemType>* node, const wst
// setup the parameter position of children so we can hook them up later
nodeParamStart = 0;
nodeParamCount = cnNodeType == OperationNameOf(ConvolutionNode) ? 2 : 1;
nodeParamCount = (cnNodeType == OperationNameOf(ConvolutionNode) || cnNodeType == OperationNameOf(MaxUnpoolingNode))
? 2
: 1;
if (pass == ndlPassInitial)
{
if (parameter.size() == 3)
if (parameter.size() == 2 || parameter.size() == 3)
{
auto reqParams = node->GetParameters(false);
auto optParams = node->GetParameters(true);
@ -378,21 +389,19 @@ void NDLNodeEvaluatorImpl<ElemType>::Evaluate(NDLNode<ElemType>* node, const wst
ImageLayoutKind imageLayout = ImageLayoutKindFrom(node->GetOptionalParameter("imageLayout", "CHW"));
size_t maxTempMemSizeInSamples = node->GetOptionalParameter("maxTempMemSizeInSamples", "0");
auto pool = PoolKind::None;
if (cnNodeType == OperationNameOf(PoolingNode))
if (cnNodeType == OperationNameOf(MaxUnpoolingNode))
nodePtr = builder.MaxUnpooling(NULL, NULL, kernelShape, stride, autoPad, lowerPad, upperPad, imageLayout, name);
else if (cnNodeType == OperationNameOf(PoolingNode))
{
auto parm = node->GetParentScript()->ParseVariable(reqParams[1]->GetValue(), false);
pool = PoolKindFrom(wstring(parm->GetValue()));
}
if (pool == PoolKind::None)
{
nodePtr = builder.Convolution(NULL, NULL, kernelShape, mapCount, stride, sharing,
autoPad, lowerPad, upperPad, imageLayout, maxTempMemSizeInSamples, name);
auto pool = PoolKindFrom(wstring(parm->GetValue()));
nodePtr = builder.Pooling(NULL, pool, kernelShape, stride, autoPad, lowerPad, upperPad, imageLayout, name);
}
else
{
nodePtr = builder.Pooling(NULL, pool, kernelShape, stride, autoPad, lowerPad, upperPad, imageLayout, name);
bool transpose = node->GetOptionalParameter("transpose", "false");
nodePtr = builder.Convolution(NULL, NULL, kernelShape, mapCount, stride, sharing,
autoPad, lowerPad, upperPad, transpose, imageLayout, maxTempMemSizeInSamples, name);
}
}

Просмотреть файл

@ -197,6 +197,7 @@ bool CheckFunction(std::string& p_nodeType, bool* allowUndeterminedVariable)
else if (EqualInsensitive(nodeType, OperationNameOf(MatrixL1RegNode), L"L1Reg")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(MatrixL2RegNode), L"L2Reg")) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(MaxPoolingNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(MaxUnpoolingNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(MeanNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(MinusNode))) ret = true;
else if (EqualInsensitive(nodeType, OperationNameOf(NegateNode))) ret = true;

Просмотреть файл

@ -216,8 +216,11 @@ WeightedLogistic(label, probability, instanceWeight, tag='') = new ComputationNo
ReconcileDynamicAxis(dataInput, layoutInput, tag='') = new ComputationNode [ operation = 'ReconcileDynamicAxis' ; inputs = (dataInput : layoutInput) /*plus the function args*/ ]
ReconcileMBLayout = ReconcileDynamicAxis # back compat
CastAs (type, data) = ReconcileDynamicAxis (data, type) # read as CastAs<type>(data) where the cast may consist of rearranging the data w.r.t. MBLayout or broadcasting across sequence items
Convolution(weightNode, inputValueNode, kernelDims, mapDims = 1, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', maxTempMemSizeInSamples = 0, tag='') = new ComputationNode [ operation = 'Convolution' ; inputs = (weightNode : inputValueNode); kernelShape = new TensorShape [ dims = kernelDims ] ; mapCount = new TensorShape [ dims = mapDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimSharing = new BoolVector [ items = sharing ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
Convolution(weightNode, inputValueNode, kernelDims, mapDims = 1, stride = 1, sharing = true, autoPadding = true, lowerPad = 0, upperPad = 0, transpose=false, imageLayout='CHW', maxTempMemSizeInSamples = 0, tag='') = new ComputationNode [ operation = 'Convolution' ; inputs = (weightNode : inputValueNode); kernelShape = new TensorShape [ dims = kernelDims ] ; mapCount = new TensorShape [ dims = mapDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimSharing = new BoolVector [ items = sharing ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
# ND pooling/unpooling
Pooling(input, poolKind/*'max'|'average'*/, kernelDims, stride=1, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'Pooling' ; inputs = (input); pool = poolKind ; kernelShape = new TensorShape [ dims = kernelDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
MaxUnpooling(unpoolInput, poolInput, kernelDims, stride=1, autoPadding = true, lowerPad = 0, upperPad = 0, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'MaxUnpooling' ; inputs = (unpoolInput : poolInput); kernelShape = new TensorShape [ dims = kernelDims ] ; strideShape = new TensorShape [ dims = stride ] ; dimPadding = new BoolVector [ items = autoPadding ] ; dimPadLower = new TensorShape [ dims = lowerPad ] ; dimPadUpper = new TensorShape [ dims = upperPad ] /*plus the function args*/ ]
# 2D pooling
MaxPooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'MaxPooling' ; inputs = input /*plus the function args*/ ]
AveragePooling(input, windowWidth, windowHeight, horizontalSubsample, verticalSubsample, imageLayout='CHW', tag='') = new ComputationNode [ operation = 'AveragePooling' ; inputs = input /*plus the function args*/ ]
ColumnwiseCrossProduct = KhatriRaoProduct // deprecated

Просмотреть файл

@ -55,6 +55,7 @@
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\Readers\ReaderLib;$(SolutionDir)Source\ActionsLib;$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\SGDLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(MSMPI_INC);$(NvmlInclude)</AdditionalIncludeDirectories>
<DisableSpecificWarnings>4819</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(MSMPI_LIB64);$(OutDir);$(NvmlLibPath)</AdditionalLibraryDirectories>

Просмотреть файл

@ -1,6 +1,12 @@
@echo off
setlocal enableDelayedexpansion
::: Copyright (c) Microsoft. All rights reserved.
:::
::: Licensed under the MIT license. See LICENSE.md file in the project root
::: for full license information.
::: ==============================================================================
:::
::: This is called as a pre-build step for the CNTK executable.
::: It receives the build's configuration, $(Configuration), as first paramter.
::: It creates buildinfo.h, which makes version information available to the executable itself.
@ -28,9 +34,14 @@ if not errorlevel 1 (
)
)
:: For now, math lib is basically hardwired
if exist ACML_PATH (
echo #define _MATHLIB_ "acml">> buildinfo.h$$
if "%CNTK_MKL%" == "1" (
if "%CNTK_MKL_SEQUENTIAL%" == "1" (
echo #define _MATHLIB_ "mkl-sequential">> buildinfo.h$$
) else (
echo #define _MATHLIB_ "mkl">> buildinfo.h$$
)
) else (
echo #define _MATHLIB_ "acml">> buildinfo.h$$
)
echo #define _BUILDER_ "%USERNAME%" >> buildinfo.h$$

Просмотреть файл

@ -151,7 +151,7 @@ void File::Init(const wchar_t* filename, int fileOptions)
#ifdef _WIN32
// Win32 accepts forward slashes, but it seems that PathRemoveFileSpec() does not
// TODO:
// "PathCchCanonicalize does the / to \ conversion as a part of the canonicalization, its
// "PathCchCanonicalize does the / to \ conversion as a part of the canonicalization, it's
// probably a good idea to do that anyway since I suspect that the '..' characters might
// confuse the other PathCch functions" [Larry Osterman]
// "Consider GetFullPathName both for canonicalization and last element finding." [Jay Krell]

Просмотреть файл

@ -498,9 +498,9 @@ public:
// check for custom separator character
// If the opening brace is immediately followed by any of the customSeparators,
// change m_separator (inside seps) to that character.
// The parser lets you change the default separator to something else. For example the default separator for an array is usually the : (I think)
// The parser lets you change the default separator to something else. For example the default separator for an array is usually the ':' (I think)
// (12:45:23:46)
// However if you are using strings, and one of those strings contains a :, you might want to change the separator to something else:
// However if you are using strings, and one of those strings contains a ':', you might want to change the separator to something else:
// (;this;is;a;path:;c:\mydir\stuff)
//
// This will fail for

Просмотреть файл

@ -1,12 +1,20 @@
#pragma once
// This uses mpi.h which requires the Microsoft MPI SDK to be installed on Windows
// [cf. https://msdn.microsoft.com/en-us/library/bb524831(v=vs.85).aspx]
// download msmpisdk.msi at https://www.microsoft.com/en-us/download/details.aspx?id=49926 and run it
// and the MPI dev package on Linux (sudo apt-get install libopenmpi-dev openmpi-bin openmpi-doc)
// Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#ms-mpi or
// https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Linux#open-mpi for setup instructions
// of an MPI implementation on your platform.
#ifdef _MSC_VER
// Suppress warning for non-ASCII characters in MS-MPI headers
#pragma warning(push)
#pragma warning(disable : 4819) // The file contains a character that cannot be represented in the current code page (...). Save the file in Unicode format to prevent data loss
#include "mpi.h"
#pragma warning(pop)
#else
#include "mpi.h"
#endif
#pragma comment(lib, "msmpi.lib")
#include <string>
#include <array>
#include <vector>

Просмотреть файл

@ -130,6 +130,7 @@ static shared_ptr<ComputationNode<ElemType>> CreateStandardNode(const std::wstri
#if 1
else if (nodeType == OperationNameOf(LegacyReshapeNode)) return New<LegacyReshapeNode<ElemType>>(forward<_Types>(_Args)...);
#endif
else if (nodeType == OperationNameOf(MaxUnpoolingNode)) return New<MaxUnpoolingNode<ElemType>>(forward<_Types>(_Args)...);
else InvalidArgument("Attempted to instantiate undefined operation %ls.", nodeType.c_str());
}
@ -249,12 +250,12 @@ template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::CreateConvolutionNode(const std::wstring& nodeName, const TensorShape& kernelShape, const TensorShape& mapCount,
const TensorShape& strideShape, const std::vector<bool>& sharing,
const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
{
return net.AddNodeToNetWithElemType(New<ConvolutionNode<ElemType>>(net.GetDeviceId(), nodeName,
kernelShape, mapCount, strideShape,
sharing, autoPadding, lowerPad, upperPad,
imageLayout, maxTempMemSizeInSamples));
transpose, imageLayout, maxTempMemSizeInSamples));
}
template <class ElemType>
@ -314,13 +315,13 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Convo
const TensorShape& kernelShape, const TensorShape& mapCount,
const TensorShape& strideShape, const std::vector<bool>& sharing,
const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples,
bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples,
const std::wstring nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<ConvolutionNode<ElemType>>(net.GetDeviceId(), nodeName,
kernelShape, mapCount, strideShape,
sharing, autoPadding, lowerPad, upperPad,
imageLayout, maxTempMemSizeInSamples),
transpose, imageLayout, maxTempMemSizeInSamples),
{ weight, inputValues });
}
@ -336,6 +337,19 @@ shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::Pooli
{ inputValues });
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::MaxUnpooling(const ComputationNodePtr unpoolInputValues,
const ComputationNodePtr poolInputValues,
const TensorShape& kernelShape, const TensorShape& strideShape,
const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
ImageLayoutKind imageLayout,
const std::wstring nodeName)
{
return net.AddNodeToNetAndAttachInputs(New<MaxUnpoolingNode<ElemType>>(net.GetDeviceId(), nodeName,
kernelShape, strideShape, autoPadding, lowerPad, upperPad, imageLayout),
{ unpoolInputValues, poolInputValues });
}
template <class ElemType>
shared_ptr<ComputationNode<ElemType>> ComputationNetworkBuilder<ElemType>::MaxPooling(const ComputationNodePtr inputValues,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind,

Просмотреть файл

@ -54,7 +54,7 @@ public:
ComputationNodePtr CreateSparseInputNode(const std::wstring& inputName, const TensorShape& sampleLayout, const wstring& dynamicAxisName = L"");
ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape,
const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples);
bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples);
ComputationNodePtr CreateConvolutionNode(const std::wstring& nodeName, const size_t kernelWidth, const size_t kernelHeight, const size_t outputChannels,
const size_t horizontalSubsample, const size_t verticalSubsample,
ImageLayoutKind imageLayoutKind, const bool zeroPadding = false, const size_t maxTempMemSizeInSamples = 0);
@ -81,13 +81,19 @@ public:
const ComputationNodePtr inputValues,
const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape,
const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples,
bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples,
const std::wstring nodeName = L"");
ComputationNodePtr Pooling(const ComputationNodePtr inputValues,
PoolKind poolKind, const TensorShape& kernelShape, const TensorShape& strideShape,
const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
ImageLayoutKind imageLayout,
const std::wstring nodeName = L"");
ComputationNodePtr MaxUnpooling(const ComputationNodePtr unpoolInputValues,
const ComputationNodePtr poolInputValues,
const TensorShape& kernelShape, const TensorShape& strideShape,
const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
ImageLayoutKind imageLayout,
const std::wstring nodeName = L"");
ComputationNodePtr MaxPooling(const ComputationNodePtr inputValues,
const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind,
const std::wstring nodeName = L"");

Просмотреть файл

@ -41,6 +41,7 @@
<PrecompiledHeader>
</PrecompiledHeader>
<PreprocessorDefinitions>WIN32;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<DisableSpecificWarnings>4819</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(MSMPI_LIB64);$(OutDir);$(NvmlLib)</AdditionalLibraryDirectories>

Просмотреть файл

@ -37,7 +37,8 @@
#define CNTK_MODEL_VERSION_6 6 // Batch norm blending
#define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file
#define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_8
#define CNTK_MODEL_VERSION_9 9 // Transpose flag in ConvolutionNode to support deconvolution.
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_9
extern bool g_shareNodeValueMatrices;

Просмотреть файл

@ -52,15 +52,15 @@ class ConvolutionNodeBase : public ComputationNode<ElemType>
public:
ConvolutionNodeBase(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name), m_poolKind(PoolKind::None), m_maxTempMemSizeInSamples(0)
: Base(deviceId, name), m_poolKind(PoolKind::None), m_transpose(false), m_maxTempMemSizeInSamples(0)
{
}
ConvolutionNodeBase(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape,
const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
PoolKind poolKind, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
: Base(deviceId, name), m_kernelShape(kernelShape), m_mapCount(mapCount), m_stride(strideShape), m_sharing(sharing),
m_autoPad(autoPadding), m_lowerPad(lowerPad), m_upperPad(upperPad), m_poolKind(poolKind),
m_imageLayout(imageLayout), m_maxTempMemSizeInSamples(maxTempMemSizeInSamples)
const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
PoolKind poolKind, bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
: Base(deviceId, name), m_kernelShape(kernelShape), m_mapCount(mapCount), m_stride(strideShape), m_sharing(sharing),
m_autoPad(autoPadding), m_lowerPad(lowerPad), m_upperPad(upperPad), m_poolKind(poolKind), m_transpose(transpose),
m_imageLayout(imageLayout), m_maxTempMemSizeInSamples(maxTempMemSizeInSamples)
{
}
@ -79,6 +79,7 @@ public:
fstream << (int32_t)m_poolKind;
fstream << (int32_t)m_imageLayout;
fstream << m_maxTempMemSizeInSamples;
fstream << m_transpose;
}
void Load(File& fstream, size_t modelVersion) override
@ -102,7 +103,11 @@ public:
fstream >> layout;
m_imageLayout = (ImageLayoutKind)layout;
fstream >> m_maxTempMemSizeInSamples;
}
}
if (modelVersion >= CNTK_MODEL_VERSION_9)
{
fstream >> m_transpose;
}
}
void CopyTo(ComputationNodeBasePtr nodeP, const std::wstring& newName, const CopyNodeFlags flags) const override
@ -119,64 +124,12 @@ public:
node->m_lowerPad = m_lowerPad;
node->m_upperPad = m_upperPad;
node->m_poolKind = m_poolKind;
node->m_transpose = m_transpose;
node->m_imageLayout = m_imageLayout;
node->m_maxTempMemSizeInSamples = m_maxTempMemSizeInSamples;
}
}
void BackpropTo(const size_t inputIndex, const FrameRange& fr) override
{
auto sliceOutputGrad = GradientFor(fr);
if (m_poolKind == PoolKind::None)
{
if (inputIndex == 0) // derivative with respect to the weight matrix
{
auto& grad = Input(0)->GradientAsMatrix();
auto sliceInput1Value = Input(1)->ValueFor(fr);
m_convEng->BackwardKernel(sliceOutputGrad, sliceInput1Value, grad, fr.IsAllFrames(), *m_tempMatrix);
}
else if (inputIndex == 1) // derivative with respect to the input feature
{
auto& input0 = Input(0)->ValueAsMatrix();
auto sliceInput1Grad = Input(1)->GradientFor(fr);
m_convEng->BackwardData(sliceOutputGrad, input0, sliceInput1Grad, *m_tempMatrix);
}
}
else
{
Matrix<ElemType> sliceInput0Grad = Input(0)->GradientFor(fr);
Matrix<ElemType> sliceInput0Value = Input(0)->ValueFor(fr);
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
m_convEng->BackwardPooling(sliceOutputValue, sliceOutputGrad, sliceInput0Value, sliceInput0Grad);
}
}
bool OutputUsedInComputingInputNodesGradients() const override
{
// The ConvolutionNode requires output values only for max pooling.
return m_poolKind == PoolKind::Max;
}
void ForwardProp(const FrameRange& fr) override
{
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
if (m_poolKind == PoolKind::None)
{
const Matrix<ElemType>& input0 = Input(0)->ValueAsMatrix();
Matrix<ElemType> sliceInput1Value = Input(1)->ValueFor(fr);
m_convEng->Forward(sliceInput1Value, input0, sliceOutputValue, *m_tempMatrix);
}
else
{
const Matrix<ElemType>& input0 = Input(0)->ValueFor(fr);
m_convEng->ForwardPooling(input0, sliceOutputValue);
}
}
void DumpNodeInfo(const bool printValues, const bool printMetadata, File& fstream) const override
{
Base::DumpNodeInfo(printValues, printMetadata, fstream);
@ -195,6 +148,7 @@ protected:
TensorShape m_lowerPad;
TensorShape m_upperPad;
PoolKind m_poolKind;
bool m_transpose;
ImageLayoutKind m_imageLayout;
size_t m_maxTempMemSizeInSamples;
@ -214,6 +168,7 @@ protected: \
using Base::m_lowerPad; \
using Base::m_upperPad; \
using Base::m_poolKind; \
using Base::m_transpose; \
using Base::m_imageLayout; \
using Base::m_maxTempMemSizeInSamples; \
using Base::m_tempMatrix; \
@ -241,8 +196,8 @@ public:
}
ConvolutionNode(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& strideShape,
const std::vector<bool>& sharing, const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
: Base(deviceId, name, kernelShape, mapCount, strideShape, sharing, autoPadding, lowerPad, upperPad, PoolKind::None, imageLayout, maxTempMemSizeInSamples),
bool transpose, ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples)
: Base(deviceId, name, kernelShape, mapCount, strideShape, sharing, autoPadding, lowerPad, upperPad, PoolKind::None, transpose, imageLayout, maxTempMemSizeInSamples),
m_convolution2D(false)
{
}
@ -250,16 +205,16 @@ public:
const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayout,
bool zeroPadding, size_t maxTempMemSizeInSamples)
: ConvolutionNode(deviceId, name, TensorShape(kernelWidth, kernelHeight, 1), TensorShape(1, 1, outputChannels),
TensorShape(horizontalSubsample, verticalSubsample, 1), vector<bool>{true},
TensorShape(horizontalSubsample, verticalSubsample, 1), vector<bool>{true},
vector<bool>{zeroPadding}, TensorShape(0), TensorShape(0),
imageLayout, maxTempMemSizeInSamples)
false, imageLayout, maxTempMemSizeInSamples)
{
m_convolution2D = true;
}
ConvolutionNode(const ScriptableObjects::IConfigRecordPtr configp)
: ConvolutionNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"kernelShape"), configp->Get(L"mapCount"), configp->Get(L"strideShape"),
configp->Get(L"dimSharing"), configp->Get(L"dimPadding"), configp->Get(L"dimPadLower"), configp->Get(L"dimPadUpper"),
ImageLayoutKindFrom(configp->Get(L"imageLayout")), configp->Get(L"maxTempMemSizeInSamples"))
configp->Get(L"transpose"), ImageLayoutKindFrom(configp->Get(L"imageLayout")), configp->Get(L"maxTempMemSizeInSamples"))
{
AttachInputsFromConfig(configp, GetExpectedNumInputs());
}
@ -317,6 +272,48 @@ public:
}
}
void ForwardProp(const FrameRange& fr) override
{
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
const Matrix<ElemType>& input0 = Input(0)->ValueAsMatrix();
Matrix<ElemType> sliceInput1Value = Input(1)->ValueFor(fr);
if (!m_transpose)
m_convEng->Forward(sliceInput1Value, input0, sliceOutputValue, *m_tempMatrix);
else
{
// BackwardData adds results to the output so need to zero them out first.
// REVIEW alexeyk: should be rolled into BackwardData itself.
sliceOutputValue.SetValue(0);
m_convEng->BackwardData(sliceInput1Value, input0, sliceOutputValue, *m_tempMatrix);
}
}
void BackpropTo(const size_t inputIndex, const FrameRange& fr) override
{
auto sliceOutputGrad = GradientFor(fr);
if (inputIndex == 0) // derivative with respect to the weight matrix
{
auto& grad = Input(0)->GradientAsMatrix();
auto sliceInput1Value = Input(1)->ValueFor(fr);
if (!m_transpose)
m_convEng->BackwardKernel(sliceOutputGrad, sliceInput1Value, grad, fr.IsAllFrames(), *m_tempMatrix);
else
m_convEng->BackwardKernel(sliceInput1Value, sliceOutputGrad, grad, fr.IsAllFrames(), *m_tempMatrix);
}
else if (inputIndex == 1) // derivative with respect to the input feature
{
auto& input0 = Input(0)->ValueAsMatrix();
auto sliceInput1Grad = Input(1)->GradientFor(fr);
if (!m_transpose)
m_convEng->BackwardData(sliceOutputGrad, input0, sliceInput1Grad, *m_tempMatrix);
else
{
// REVIEW alexeyk: Forward overwrites values in sliceInput1Grad. Should handle correctly instead.
m_convEng->Forward(sliceOutputGrad, input0, sliceInput1Grad, *m_tempMatrix);
}
}
}
void Validate(bool isFinalValidationPass) override
{
Base::Validate(isFinalValidationPass);
@ -324,6 +321,8 @@ public:
size_t inputIdx = GetExpectedNumInputs() - 1;
TensorShape inputShape;
TensorShape outputShape;
// If 2D convolution syntax is used then some of the tensor dimensions need to be inferred.
if (m_convolution2D)
{
// Need to update some tensors with correct input dims.
@ -346,38 +345,42 @@ public:
if (isFinalValidationPass && (Input(0)->GetAsMatrixNumCols() != weightCols || Input(0)->GetAsMatrixNumRows() != mapCount))
{
LogicError("Convolution weight matrix %ls should have dimension [%d, %d] which is [outputChannels, kernelWidth * kernelHeight * inputChannels]",
LogicError("Convolution weight matrix %ls should have dimension [%d, %d] which is [outputChannels, kernelWidth * kernelHeight * inputChannels]",
Input(0)->NodeName().c_str(), (int)mapCount, (int)weightCols);
}
auto outDims = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
// ConvolveGeometry always uses CHW.
SetDims(ImageDimensions(outDims, ImageLayoutKind::CHW).AsTensorShape(m_imageLayout), HasMBLayout());
outputShape = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
}
else
{
if (m_imageLayout != ImageLayoutKind::CHW)
{
InvalidArgument(
"%ls %ls supports only cuDNN (CHW) data layout. "
"Please specify imageLayout=\"cudnn\" in %ls node in your script "
"and make sure input data layout is CHW", NodeName().c_str(), OperationName().c_str(), NodeName().c_str());
}
inputShape = GetInputSampleLayout(inputIdx);
auto outDims = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
SetDims(outDims, HasMBLayout());
if (!m_transpose)
{
outputShape = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
}
else
{
// In case of transpose (deconvolution), node input (inputShape) is really the output of the convolution
// and node output (outDims) is convolution input. ConvolveGeometry does not care about deconvolutions (it does not have to).
outputShape = ConvolveGeometry::ComputeInputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
}
}
// ConvolveGeometry always uses CHW.
SetDims(ImageDimensions(outputShape, ImageLayoutKind::CHW).AsTensorShape(m_imageLayout), HasMBLayout());
if (isFinalValidationPass)
{
if (m_convEng == nullptr)
{
auto geometry = std::make_shared<ConvolveGeometry>(inputShape, m_kernelShape, m_mapCount, m_stride,
auto geometry = std::make_shared<ConvolveGeometry>(!m_transpose ? inputShape : outputShape,
m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
m_convEng = ConvolutionEngine<ElemType>::Create(geometry, m_deviceId, m_imageLayout,
m_maxTempMemSizeInSamples, m_poolKind);
m_maxTempMemSizeInSamples, m_poolKind,
ConvolutionEngineKind::All, NodeName());
}
if (Input(0)->GetAsMatrixNumCols() != m_kernelShape.GetNumElements() ||
@ -409,11 +412,13 @@ public:
}
protected:
// Flag that indicates whether the node is created using 2D-syntax.
bool m_convolution2D;
};
// -----------------------------------------------------------------------
// PoolingNode (inputFeature)
// Performs max or average ND pooling.
// -----------------------------------------------------------------------
template <class ElemType>
@ -432,9 +437,9 @@ public:
{
}
PoolingNode(DEVICEID_TYPE deviceId, const wstring& name, PoolKind pool, const TensorShape& kernelShape, const TensorShape& strideShape,
const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
ImageLayoutKind imageLayout)
: Base(deviceId, name, kernelShape, TensorShape(1), strideShape, vector<bool>{true}, autoPadding, lowerPad, upperPad, pool, imageLayout, 0)
const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
ImageLayoutKind imageLayout)
: Base(deviceId, name, kernelShape, TensorShape(1), strideShape, vector<bool>{true}, autoPadding, lowerPad, upperPad, pool, false, imageLayout, 0)
{
}
PoolingNode(const ScriptableObjects::IConfigRecordPtr configp)
@ -447,6 +452,129 @@ public:
}
public:
void ForwardProp(const FrameRange& fr) override
{
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
const Matrix<ElemType>& input0 = Input(0)->ValueFor(fr);
m_convEng->ForwardPooling(input0, sliceOutputValue);
}
void BackpropTo(const size_t inputIndex, const FrameRange& fr) override
{
auto sliceOutputGrad = GradientFor(fr);
Matrix<ElemType> sliceInput0Grad = Input(0)->GradientFor(fr);
Matrix<ElemType> sliceInput0Value = Input(0)->ValueFor(fr);
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
m_convEng->BackwardPooling(sliceOutputValue, sliceOutputGrad, sliceInput0Value, sliceInput0Grad);
}
bool OutputUsedInComputingInputNodesGradients() const override
{
// The PoolingNode requires output values only for max pooling.
return m_poolKind == PoolKind::Max;
}
void Validate(bool isFinalValidationPass) override
{
auto inputShape = GetInputSampleLayout(0);
ValidatePooling(inputShape, isFinalValidationPass);
if (isFinalValidationPass)
{
if (m_convEng == nullptr)
{
auto geometry = std::make_shared<ConvolveGeometry>(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
m_convEng = ConvolutionEngine<ElemType>::Create(geometry, m_deviceId, m_imageLayout,
m_maxTempMemSizeInSamples, m_poolKind,
ConvolutionEngineKind::All, NodeName());
}
}
}
protected:
void ValidatePooling(const TensorShape& inputShape, bool isFinalValidationPass)
{
Base::Validate(isFinalValidationPass);
InferMBLayoutFromInputsForStandardCase(isFinalValidationPass);
if (m_imageLayout != ImageLayoutKind::CHW)
{
InvalidArgument(
"%ls %ls supports only cuDNN (CHW) data layout. "
"Please specify imageLayout=\"cudnn\" in %ls node in your script "
"and make sure input data layout is CHW", NodeName().c_str(), OperationName().c_str(), NodeName().c_str());
}
auto outDims = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
SetDims(outDims, HasMBLayout());
}
};
// -----------------------------------------------------------------------
// MaxUnpoolingNode (unpoolInputValues, poolInputValues)
// Performs "max unpooling" operation. Max unpooling mirrors the operation
// performed by max pooling node and depends on the values provided to
// the max pooling node (so unlike deconvolution operation, it is not
// completely independent). Unpooling takes 2 inputs: features to be unpooled,
// which tensor has the same shape as corresponding max pooling node output
// and inputs for the original pooling node. Unpooling node
// produces an output which has the same dimensions as input to the
// corresponding max pooling node (i.e. poolInputValues).
// TODO: need to add support for other pooling types, for example,
// average unpooling. Note that in this case, generic unpooling operation
// will take different number of inputs depending on pooling type.
// -----------------------------------------------------------------------
template <class ElemType>
class MaxUnpoolingNode : public ConvolutionNodeBase<ElemType>, public NumInputs<2>
{
typedef ConvolutionNodeBase<ElemType> Base;
UsingConvolutionNodeBaseMembers;
static const std::wstring TypeName() { return L"MaxUnpooling"; }
public:
MaxUnpoolingNode(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name)
{
}
MaxUnpoolingNode(DEVICEID_TYPE deviceId, const wstring& name, const TensorShape& kernelShape, const TensorShape& strideShape,
const std::vector<bool>& autoPadding, const TensorShape& lowerPad, const TensorShape& upperPad,
ImageLayoutKind imageLayout)
: Base(deviceId, name, kernelShape, TensorShape(1), strideShape, vector<bool>{true}, autoPadding, lowerPad, upperPad, PoolKind::Max, true, imageLayout, 0)
{
}
MaxUnpoolingNode(const ScriptableObjects::IConfigRecordPtr configp)
: MaxUnpoolingNode(configp->Get(L"deviceId"), L"<placeholder>", configp->Get(L"kernelShape"),
configp->Get(L"strideShape"), configp->Get(L"dimPadding"), configp->Get(L"dimPadLower"), configp->Get(L"dimPadUpper"),
ImageLayoutKindFrom(configp->Get(L"imageLayout")))
{
AttachInputsFromConfig(configp, GetExpectedNumInputs());
}
public:
void ForwardProp(const FrameRange& fr) override
{
const Matrix<ElemType>& unpoolInput = Input(0)->ValueFor(fr);
const Matrix<ElemType>& poolInput = Input(1)->ValueFor(fr);
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
m_convEng->MaxUnpooling(unpoolInput, poolInput, sliceOutputValue);
}
void BackpropTo(const size_t inputIndex, const FrameRange& fr) override
{
if (inputIndex != 0)
return;
auto sliceOutputGrad = GradientFor(fr);
Matrix<ElemType> sliceInput0Grad = Input(0)->GradientFor(fr);
// BUGBUG: ForwardPooling overwrites values in sliceInput1Grad. Should handle correctly instead.
m_convEng->ForwardPooling(sliceOutputGrad, sliceInput0Grad);
}
bool OutputUsedInComputingInputNodesGradients() const override { return false; }
void Validate(bool isFinalValidationPass) override
{
Base::Validate(isFinalValidationPass);
@ -461,18 +589,22 @@ public:
}
auto inputShape = GetInputSampleLayout(0);
auto outDims = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
SetDims(outDims, HasMBLayout());
// Same as in case of deconvolution, node input (inputShape) is really the output of the max pooling
// and node output (outDims) is pooling input.
auto outputShape = ConvolveGeometry::ComputeInputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
SetDims(outputShape, HasMBLayout());
if (isFinalValidationPass)
{
if (m_convEng == nullptr)
{
auto geometry = std::make_shared<ConvolveGeometry>(inputShape, m_kernelShape, m_mapCount, m_stride,
auto geometry = std::make_shared<ConvolveGeometry>(outputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
// Create reference engine as it's the only engine that implements unpooling.
m_convEng = ConvolutionEngine<ElemType>::Create(geometry, m_deviceId, m_imageLayout,
m_maxTempMemSizeInSamples, m_poolKind);
m_maxTempMemSizeInSamples, m_poolKind,
ConvolutionEngineKind::Reference,
NodeName());
}
}
}
@ -491,20 +623,20 @@ class PoolingNodeBase : public ComputationNode<ElemType>, public NumInputs<1>
public:
PoolingNodeBase(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name),
m_windowWidth(SIZE_MAX),
m_windowHeight(SIZE_MAX),
m_horizontalSubsample(SIZE_MAX),
m_verticalSubsample(SIZE_MAX),
m_imageLayoutKind(ImageLayoutKind::HWC)
m_windowWidth(SIZE_MAX),
m_windowHeight(SIZE_MAX),
m_horizontalSubsample(SIZE_MAX),
m_verticalSubsample(SIZE_MAX),
m_imageLayoutKind(ImageLayoutKind::HWC)
{
}
PoolingNodeBase(DEVICEID_TYPE deviceId, const wstring& name, const size_t windowWidth, const size_t windowHeight, const size_t horizontalSubsample, const size_t verticalSubsample, ImageLayoutKind imageLayoutKind)
: Base(deviceId, name),
m_windowWidth(windowWidth),
m_windowHeight(windowHeight),
m_horizontalSubsample(horizontalSubsample),
m_verticalSubsample(verticalSubsample),
m_imageLayoutKind(imageLayoutKind)
m_windowWidth(windowWidth),
m_windowHeight(windowHeight),
m_horizontalSubsample(horizontalSubsample),
m_verticalSubsample(verticalSubsample),
m_imageLayoutKind(imageLayoutKind)
{
}
PoolingNodeBase(const ScriptableObjects::IConfigRecordPtr configp)
@ -517,8 +649,8 @@ public:
void Save(File& fstream) const override
{
Base::Save(fstream);
uint32_t imageLayoutKind = (uint32_t) m_imageLayoutKind;
uint32_t windowWidth = (uint32_t) m_windowWidth;
uint32_t imageLayoutKind = (uint32_t)m_imageLayoutKind;
uint32_t windowWidth = (uint32_t)m_windowWidth;
fstream << windowWidth << imageLayoutKind << m_windowHeight << m_horizontalSubsample << m_verticalSubsample;
}
@ -551,6 +683,14 @@ public:
}
}
void ForwardProp(const FrameRange& fr) override
{
Matrix<ElemType> sliceInput0Value = Input(0)->ValueFor(fr);
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
m_convEng->ForwardPooling(sliceInput0Value, sliceOutputValue);
}
void BackpropTo(const size_t /*inputIndex*/, const FrameRange& fr) override
{
Matrix<ElemType> sliceInput0Grad = Input(0)->GradientFor(fr);
@ -562,14 +702,6 @@ public:
m_convEng->BackwardPooling(sliceOutputValue, sliceOutputGrad, sliceInput0Value, sliceInput0Grad);
}
void ForwardProp(const FrameRange& fr) override
{
Matrix<ElemType> sliceInput0Value = Input(0)->ValueFor(fr);
Matrix<ElemType> sliceOutputValue = ValueFor(fr);
m_convEng->ForwardPooling(sliceInput0Value, sliceOutputValue);
}
void Validate(bool isFinalValidationPass) override
{
Base::Validate(isFinalValidationPass);
@ -686,7 +818,11 @@ public:
{
Base::Validate(isFinalValidationPass);
if (isFinalValidationPass && m_convEng == nullptr)
m_convEng = ConvolutionEngine<ElemType>::Create(m_geometry, m_deviceId, m_imageLayoutKind, 0, PoolKind::Max);
{
m_convEng = ConvolutionEngine<ElemType>::Create(m_geometry, m_deviceId, m_imageLayoutKind,
0, PoolKind::Max,
ConvolutionEngineKind::All, NodeName());
}
}
};
@ -722,7 +858,11 @@ public:
{
Base::Validate(isFinalValidationPass);
if (isFinalValidationPass && m_convEng == nullptr)
m_convEng = ConvolutionEngine<ElemType>::Create(m_geometry, m_deviceId, m_imageLayoutKind, 0, PoolKind::Average);
{
m_convEng = ConvolutionEngine<ElemType>::Create(m_geometry, m_deviceId, m_imageLayoutKind,
0, PoolKind::Average,
ConvolutionEngineKind::All, NodeName());
}
}
};

Просмотреть файл

@ -83,7 +83,7 @@ protected:
// - ranges of neighbor frames as a secondary tensor dimension (i.e. can be used to implement a rolling window)
// - full support/efficiency of non-recurrent use (in which case the range can be from negative to positive, e.g. a symmetric rolling window)
// - denoting which tensor dimension to loop over (this may not be completed, but I will plant a seed)
// - support for Yongqiangs sub-minibatching with truncated BPTT (export/import state)
// - support for Yongqiang's sub-minibatching with truncated BPTT (export/import state)
// - more efficient storage of carried-over state (only store the needed frames, not a full copy of the previous MB as currently; which will on the other hand also allow windows that reach back beyond a minibatch)
// -----------------------------------------------------------------------

Просмотреть файл

@ -90,7 +90,7 @@ template <class ElemType>
// "LogPlus": softmax
// f(x) = log(sum_i exp x_i), hence gradient is:
// df / dx_i = 1 / (sum_j exp x_j) * exp x_i = (Softmax(x))_i = exp(x_i ReduceLogPlus(x))
// df / dx_i = 1 / (sum_j exp x_j) * exp x_i = (Softmax(x))_i = exp(x_i - ReduceLogPlus(x))
// targetGradient = gradientFromTop .* Exp (inputValue - outputValue) --TODO: verify
// i.e. compute dfference if input and output, then Exp in-place. No, would need temp memory. So needs its own opcode AddScaledExpOfDiff(). Ternary.

Просмотреть файл

@ -55,8 +55,9 @@
<TargetName>EvalDll</TargetName>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\Readers\ReaderLib;$(SolutionDir)Source\SGDLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\SequenceTrainingLib;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\CNTK\BrainScript;$(SolutionDir)Source\ActionsLib;$(MSMPI_INC);$(NvmlInclude)</AdditionalIncludeDirectories>
<DisableSpecificWarnings>4819</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\Math;$(MSMPI_LIB64);$(SolutionDir)$(Platform)\$(Configuration);$(NvmlLibPath)</AdditionalLibraryDirectories>
@ -153,4 +154,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -4315,6 +4315,51 @@ void CPUMatrix<ElemType>::MaxPoolingBackward(const CPUMatrix<ElemType>& out, con
}
}
template <class ElemType>
void CPUMatrix<ElemType>::MaxUnpooling(const CPUMatrix<int>& mpRowCol, const CPUMatrix<int>& mpRowIndices,
const CPUMatrix<int>& indices, const CPUMatrix<ElemType>& poolInput,
CPUMatrix<ElemType>& input) const
{
#pragma omp parallel for
for (int64_t sample = 0; sample < (int64_t)GetNumCols(); sample++)
{
for (size_t row = 0; row < GetNumRows(); row++)
{
int colBase = mpRowCol(row, 0);
assert(0 <= colBase && colBase < input.GetNumRows());
int i0 = mpRowIndices(row, 0);
int size = indices(i0++, 0);
assert(size > 0);
ElemType curMax = poolInput(colBase + indices(i0, 0), sample);
ElemType prevMax = curMax;
int imax = 0;
for (int i = 1; i < size; i++)
{
int dcol = indices(i0 + i, 0);
assert(0 <= colBase + dcol && colBase + dcol < poolInput.GetNumRows());
curMax = std::max(curMax, poolInput(colBase + dcol, sample));
if (curMax > prevMax)
{
prevMax = curMax;
imax = i;
}
}
int dcol = indices(i0 + imax, 0);
assert(0 <= colBase + dcol && colBase + dcol < input.GetNumRows());
input(colBase + dcol, sample) = (*this)(row, sample);
//int i = (int)poolIn(row, sample);
//assert(0 <= i && i < size);
//int dcol = indices(i0 + i, 0);
//assert(0 <= colBase + dcol && colBase + dcol < input.GetNumRows());
//input(colBase + dcol, sample) = (*this)(row, sample);
}
}
}
template <class ElemType>
void CPUMatrix<ElemType>::AveragePoolingForward(const CPUMatrix<int>& mpRowCol, const CPUMatrix<int>& mpRowIndices, const CPUMatrix<int>& indices, CPUMatrix<ElemType>& output) const
{

Просмотреть файл

@ -367,6 +367,7 @@ public:
void MaxPoolingBackward(const CPUMatrix<ElemType>& out, const CPUMatrix<ElemType>& in,
const CPUMatrix<int>& mpRowCol, const CPUMatrix<int>& mpRowIndices, const CPUMatrix<int>& indices,
CPUMatrix<ElemType>& grad) const;
void MaxUnpooling(const CPUMatrix<int>& mpRowCol, const CPUMatrix<int>& mpRowIndices, const CPUMatrix<int>& indices, const CPUMatrix<ElemType>& poolInput, CPUMatrix<ElemType>& input) const;
void AveragePoolingForward(const CPUMatrix<int>& mpRowCol, const CPUMatrix<int>& mpRowIndices, const CPUMatrix<int>& indices, CPUMatrix<ElemType>& output) const;
void AveragePoolingBackward(const CPUMatrix<int>& mpRowCol, const CPUMatrix<int>& mpRowIndices, const CPUMatrix<int>& indices,

Просмотреть файл

@ -11,6 +11,16 @@
namespace Microsoft { namespace MSR { namespace CNTK {
// -----------------------------------------------------------------------
// The file contains CUDA kernels that are used in reference convolution
// engine. All these kernels look very similar as they use the same
// idea of precomputed maps described in ConvolveGeometry.h
// That is, 'mpRowCol' maps each convolution output to the start of the
// input. 'mpRowIwht', 'mpRowRun' and 'runs' provide maps that allow
// to get indices of the active weight when applying the convolution.
// See ConvolveGeometry.h (MpRowCol, MpRowIwht etc) for more details.
// -----------------------------------------------------------------------
template <typename ElemType>
__global__ void kConvolutionForward(int batchSize, const ElemType* __restrict__ kernel,
const int* mpRowCol, const int* mpRowIwht,
@ -203,6 +213,53 @@ __global__ void kMaxPoolingBackward(int batchSize, const ElemType* out, const El
}
}
template <typename ElemType>
__global__ void kMaxUnpooling(int batchSize, const int* mpRowCol, const int* mpRowIndices, const int* indices,
const ElemType* __restrict__ src, const ElemType* poolIn, int srcVecSize,
ElemType* dst, int dstVecSize)
{
int row = blockIdx.x * blockDim.x + threadIdx.x;
if (row >= srcVecSize)
return;
src += blockIdx.y * srcVecSize;
poolIn += blockIdx.y * dstVecSize;
dst += blockIdx.y * dstVecSize;
for (int sample = blockIdx.y; sample < batchSize; sample += gridDim.y)
{
int colBase = mpRowCol[row];
assert(0 <= colBase && colBase < dstVecSize);
int i0 = mpRowIndices[row];
int size = indices[i0++];
ElemType curMax = poolIn[colBase + indices[i0]];
ElemType prevMax = curMax;
int imax = 0;
for (int i = 1; i < size; i++)
{
int dcol = indices[i0 + i];
assert(0 <= colBase + dcol && colBase + dcol < dstVecSize);
curMax = max(curMax, poolIn[colBase + dcol]);
if (curMax > prevMax)
{
prevMax = curMax;
imax = i;
}
}
int dcol = indices[i0 + imax];
assert(0 <= colBase + dcol && colBase + dcol < dstVecSize);
dst[colBase + dcol] = src[row];
src += blockIdx.y * srcVecSize;
poolIn += blockIdx.y * dstVecSize;
dst += blockIdx.y * dstVecSize;
}
}
template <typename ElemType>
__global__ void kAveragePoolingForward(int batchSize, const int* mpRowCol, const int* mpRowIndices, const int* indices,
const ElemType* __restrict__ src, int srcVecSize,

Просмотреть файл

@ -107,6 +107,26 @@ void ConvolutionEngine<ElemType>::BackwardPooling(const Mat& out, const Mat& src
BackwardPoolingCore(out, srcGrad, in, grad);
}
template <class ElemType>
void ConvolutionEngine<ElemType>::MaxUnpooling(const Mat& out, const Mat& poolIn, Mat& in)
{
const auto& g = *m_geometry;
assert(g.InputShape().GetNumElements() == in.GetNumRows());
assert(g.InputShape().GetNumElements() == poolIn.GetNumRows());
assert(g.OutputShape().GetNumElements() == out.GetNumRows());
size_t batchSize = in.GetNumCols();
assert(batchSize == out.GetNumCols());
assert(batchSize == poolIn.GetNumCols());
#ifdef NDEBUG
UNUSED(g);
UNUSED(batchSize);
#endif
EnsureCompatible();
EnsurePoolingInitialized();
MaxUnpoolingCore(out, poolIn, in);
}
//------------------------------------------------------------------
// Reference convolution engine implementation.
// This engine supports arbitrary convolution geometry but does not provide efficient implementation.
@ -210,6 +230,11 @@ protected:
InvalidArgument("Pooling type %d is not supported.", (int)m_poolKind);
}
void MaxUnpoolingCore(const Mat& out, const Mat& poolIn, Mat& in) override
{
out.MaxUnpooling(m_mpRowCol, *m_mpRowIndices, *m_indices, poolIn, in);
}
protected:
static bool IsGpu(DEVICEID_TYPE deviceId)
{
@ -500,6 +525,15 @@ protected:
InvalidArgument("Pooling type %d is not supported.", (int)m_poolKind);
}
void MaxUnpoolingCore(const Mat& out, const Mat& poolIn, Mat& in) override
{
UNUSED(out);
UNUSED(poolIn);
UNUSED(in);
// Not implemented but potentially can make a fallback to reference engine.
LogicError("MaxUnpooling is not implemented for legacy engine.");
}
private:
ImageDimensions m_inT;
ImageDimensions m_outT;
@ -816,8 +850,11 @@ public:
template <class ElemType>
std::unique_ptr<ConvolutionEngine<ElemType>> ConvolutionEngine<ElemType>::Create(ConvolveGeometryPtr geometry, DEVICEID_TYPE deviceId,
ImageLayoutKind imageLayout, size_t maxTempMemSizeInSamples, PoolKind poolKind,
ConvolutionEngineKind enabledEngines)
ConvolutionEngineKind enabledEngines, std::wstring logPrefix)
{
if (!logPrefix.empty())
logPrefix += L": ";
auto isEnabled = [=](ConvolutionEngineKind eng) { return ((int)enabledEngines & (int)eng) != 0; };
// Note: in some cases do not throw exception even if parameters do not match as Create
// can be called from places like MEL with default parameters and never be used.
@ -829,7 +866,7 @@ std::unique_ptr<ConvolutionEngine<ElemType>> ConvolutionEngine<ElemType>::Create
if (!isEnabled(ConvolutionEngineKind::Legacy))
RuntimeError("Trying to use Legacy convolution engine when it's disabled.");
// REVIEW alexeyk: should honor m_traceLevel here.
fprintf(stderr, "\nUsing legacy convolution engine for geometry: %s.\n", engStr.c_str());
fprintf(stderr, "\n%lsusing legacy convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
return std::make_unique<LegacyConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
}
@ -837,19 +874,19 @@ std::unique_ptr<ConvolutionEngine<ElemType>> ConvolutionEngine<ElemType>::Create
if (isEnabled(ConvolutionEngineKind::CuDnn) &&
CuDnnConvolutionEngineFactory<ElemType>::IsSupported(deviceId, geometry, poolKind))
{
fprintf(stderr, "\nUsing cuDNN convolution engine for geometry: %s.\n", engStr.c_str());
fprintf(stderr, "\n%lsusing cuDNN convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
return CuDnnConvolutionEngineFactory<ElemType>::Create(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
}
if (isEnabled(ConvolutionEngineKind::Gemm) && GemmConvolutionEngine<ElemType>::IsSupported(deviceId, geometry))
{
fprintf(stderr, "\nUsing GEMM convolution engine for geometry: %s.\n", engStr.c_str());
fprintf(stderr, "\n%lsusing GEMM convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
return std::make_unique<GemmConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
}
if (!isEnabled(ConvolutionEngineKind::Reference))
RuntimeError("Reference convolution is disabled and no other engine supports such configuratin (or disabled).");
fprintf(stderr, "\nUsing reference convolution engine for geometry: %s.\n", engStr.c_str());
fprintf(stderr, "\n%lsusing reference convolution engine for geometry: %s.\n", logPrefix.c_str(), engStr.c_str());
return std::make_unique<ReferenceConvolutionEngine<ElemType>>(geometry, deviceId, imageLayout, maxTempMemSizeInSamples, poolKind);
}

Просмотреть файл

@ -55,10 +55,14 @@ public:
void BackwardPooling(const Mat& out, const Mat& srcGrad, const Mat& in, Mat& grad);
void MaxUnpooling(const Mat& out, const Mat& poolIn, Mat& in);
std::shared_ptr<const ConvolveGeometry> Geometry() const { return m_geometry; }
static std::unique_ptr<ConvolutionEngine<ElemType>> Create(ConvolveGeometryPtr geometry, DEVICEID_TYPE deviceId, ImageLayoutKind imageLayout,
size_t maxTempMemSizeInSamples, PoolKind poolKind = PoolKind::None, ConvolutionEngineKind enabledEngines = ConvolutionEngineKind::All);
size_t maxTempMemSizeInSamples, PoolKind poolKind = PoolKind::None,
ConvolutionEngineKind enabledEngines = ConvolutionEngineKind::All,
std::wstring logPrefix = L"");
DISABLE_COPY_AND_MOVE(ConvolutionEngine);
@ -91,6 +95,8 @@ protected:
virtual void BackwardPoolingCore(const Mat& out, const Mat& srcGrad, const Mat& in, Mat& grad) = 0;
virtual void MaxUnpoolingCore(const Mat& out, const Mat& poolIn, Mat& in) = 0;
protected:
ConvolveGeometryPtr m_geometry;
DEVICEID_TYPE m_deviceId;

Просмотреть файл

@ -21,6 +21,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// arbitrary configurations and dimensions. In such case the generic implementation becomes very simple and invariant
// wrt convolution configuration and dimensionality. For specific cases like 2D/3D convolutions and full sharing,
// highly optimized implementations (e.g. cuDNN) are used.
// TODO: rename to ConvolutionGeometry
class ConvolveGeometry final
{
public:
@ -426,6 +427,7 @@ public:
return -(center - (kernSize - 1) / 2);
}
// Computes output shape given input shape and other convolution parameters.
static TensorShape ComputeOutputShape(const TensorShape& inputShape, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& stride,
const BoolVec& sharing, const BoolVec& autoPad, const TensorShape& lowerPad, const TensorShape& upperPad)
{
@ -492,6 +494,69 @@ public:
return dimsOut;
}
// Computes input shape given output shape and other convolution parameters.
// Used in deconvolution operation.
static TensorShape ComputeInputShape(const TensorShape& outputShape, const TensorShape& kernelShape, const TensorShape& mapCount, const TensorShape& stride,
const BoolVec& sharing, const BoolVec& autoPad, const TensorShape& lowerPad, const TensorShape& upperPad)
{
if (outputShape.GetRank() != kernelShape.GetRank())
InvalidArgument("Convolution output and kernel tensors must have the same rank.");
if (mapCount.GetRank() != 1 && outputShape.GetRank() != mapCount.GetRank())
InvalidArgument("Convolution map tensor must have rank 1 or the same as the output tensor.");
if (stride.GetRank() != 1 && outputShape.GetRank() != stride.GetRank())
InvalidArgument("Convolution stride tensor must have rank 1 or the same as the output tensor.");
if (sharing.size() != 1 && outputShape.GetRank() != sharing.size())
InvalidArgument("Convolution sharing tensor must have rank 1 or the same as the output tensor.");
if (autoPad.size() != 1 && outputShape.GetRank() != autoPad.size())
InvalidArgument("Convolution padding tensor must have rank 1 or the same as the output tensor.");
if (lowerPad.GetRank() != 1 && outputShape.GetRank() != lowerPad.GetRank())
InvalidArgument("Convolution lower pad tensor must have rank 1 or the same as the output tensor.");
if (upperPad.GetRank() != 1 && outputShape.GetRank() != upperPad.GetRank())
InvalidArgument("Convolution upper pad tensor must have rank 1 or the same as the output tensor.");
SmallVector<size_t> dimsInput(outputShape.GetRank());
for (size_t i = 0; i < outputShape.GetRank(); i++)
{
assert(outputShape[i] >= 1);
size_t delta = stride[stride.GetRank() == 1 ? 0 : i];
size_t dim = outputShape[i];
// Input dimension does not include output map count.
size_t curMapCount = 1;
if (mapCount.size() > 1)
curMapCount = mapCount[i];
else if (i == outputShape.GetRank() - 1)
curMapCount = mapCount[0];
assert((dim % curMapCount) == 0);
dim /= curMapCount;
bool autoPadCur = autoPad[autoPad.size() == 1 ? 0 : i];
size_t lo = lowerPad[lowerPad.size() == 1 ? 0 : i];
size_t hi = upperPad[upperPad.size() == 1 ? 0 : i];
size_t dimIn = (dim - 1) * delta;
// We need to be able to restore any input size from the output, not just the one
// that does not require padding. For example, if output is 14, stride 2 and
// desired input is 28 then padded input will be 31. In this case if autopadding is enabled,
// the input will 27 as (27 - 1) / 2 + 1 == 14.
if (autoPadCur)
dimIn += 1;
else
dimIn += (int64_t)kernelShape[i] - (lo + hi);
// When LowerPad and/or UpperPad are specified (i.e. > 0), we insist that the kernel applications
// fill the entire space.
if (!autoPadCur && (lo > 0 || hi > 0))
{
size_t size = (dimIn - kernelShape[i] + lo + hi) / delta + 1;
if (size != dim)
InvalidArgument("Convolution requires that kernel fills the entire space if auto-padding is disabled.");
}
dimsInput[i] = dimIn;
}
return TensorShape(dimsInput);
}
// Used in unit tests and during debugging.
operator std::string() const
{

Просмотреть файл

@ -306,6 +306,15 @@ protected:
m_inT, ptr(in), &C::One, m_inT, ptr(grad)));
}
void MaxUnpoolingCore(const Mat& out, const Mat& poolIn, Mat& in) override
{
UNUSED(out);
UNUSED(poolIn);
UNUSED(in);
// Not implemented but potentially can make a fallback to reference engine.
LogicError("MaxUnpooling is not implemented for cuDNN engine.");
}
private:
using C = Consts<ElemType>;

Просмотреть файл

@ -3074,6 +3074,17 @@ void GPUMatrix<ElemType>::MaxPoolingBackward(const GPUMatrix<ElemType>& out, con
Data(), (int)GetNumRows(), grad.Data(), (int)grad.GetNumRows());
}
template <class ElemType>
void GPUMatrix<ElemType>::MaxUnpooling(const GPUMatrix<int>& mpRowCol, const GPUMatrix<int>& mpRowIndices, const GPUMatrix<int>& indices, const GPUMatrix<ElemType>& poolInput, GPUMatrix<ElemType>& input) const
{
const int BlockSize = 128;
auto gdim = dim3((GetNumRows() + BlockSize - 1)/ BlockSize, std::min((int)GetNumCols(), 65535));
PrepareDevice();
SyncGuard syncGuard;
kMaxUnpooling<<<gdim, BlockSize, 0, t_stream>>>((int)GetNumCols(), mpRowCol.Data(), mpRowIndices.Data(), indices.Data(),
Data(), poolInput.Data(), (int)GetNumRows(), input.Data(), (int)input.GetNumRows());
}
template <class ElemType>
void GPUMatrix<ElemType>::AveragePoolingForward(const GPUMatrix<int>& mpRowCol, const GPUMatrix<int>& mpRowIndices, const GPUMatrix<int>& indices, GPUMatrix<ElemType>& output) const
{
@ -3137,6 +3148,7 @@ void GPUMatrix<ElemType>::BatchNormalizationForward(const GPUMatrix<ElemType>& s
if (blendFactor > 0)
{
// REVIEW alexeyk: can be rolled into NormalizeBatchTraining to save bandwidth.
// TODO: add a 'beta' parameter to ScaleAndAdd()
Scale((ElemType)(1 - blendFactor), saveMean);
ScaleAndAdd((ElemType)blendFactor, runMean, saveMean);
Scale((ElemType)(1 - blendFactor), saveInvStdDev);

Просмотреть файл

@ -445,6 +445,7 @@ public:
void MaxPoolingBackward(const GPUMatrix<ElemType>& out, const GPUMatrix<ElemType>& in,
const GPUMatrix<int>& mpRowCol, const GPUMatrix<int>& mpRowIndices, const GPUMatrix<int>& indices,
GPUMatrix<ElemType>& grad) const;
void MaxUnpooling(const GPUMatrix<int>& mpRowCol, const GPUMatrix<int>& mpRowIndices, const GPUMatrix<int>& indices, const GPUMatrix<ElemType>& poolInput, GPUMatrix<ElemType>& input) const;
void AveragePoolingForward(const GPUMatrix<int>& mpRowCol, const GPUMatrix<int>& mpRowIndices, const GPUMatrix<int>& indices, GPUMatrix<ElemType>& output) const;
void AveragePoolingBackward(const GPUMatrix<int>& mpRowCol, const GPUMatrix<int>& mpRowIndices, const GPUMatrix<int>& indices, GPUMatrix<ElemType>& grad) const;

Просмотреть файл

@ -1,5 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" InitialTargets="CheckDependencies" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
@ -31,7 +31,6 @@
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v120</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
<UseIntelMKL>No</UseIntelMKL>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)" Label="Configuration">
<ConfigurationType>DynamicLibrary</ConfigurationType>
@ -39,8 +38,6 @@
<PlatformToolset>v120</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
<UseIntelMKL>No</UseIntelMKL>
<UseIntelIPP>false</UseIntelIPP>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings" />
@ -55,17 +52,18 @@
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(ACML_PATH)\include;$(SolutionDir)Source\Common\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>$(MathIncludePath);$(SolutionDir)Source\Common\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<DisableSpecificWarnings>4819</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(ACML_PATH)\lib;$(OutDir)</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories>$(MathLibraryPath);$(OutDir)</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="$(DebugBuild)">
<ClCompile>
<PrecompiledHeader>NotUsing</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<PreprocessorDefinitions>USE_ACML; NO_SYNC; WIN32; _DEBUG; _WINDOWS; _USRDLL; MATH_EXPORTS; %(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>$(MathDefine); NO_SYNC; WIN32; _DEBUG; _WINDOWS; _USRDLL; MATH_EXPORTS; %(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<Optimization>Disabled</Optimization>
@ -77,13 +75,13 @@
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalDependencies>libacml_mp_dll.lib;Common.lib;%(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>libacml_mp_dll.dll; $(CudaDlls); %(DelayLoadDLLs)</DelayLoadDLLs>
<AdditionalDependencies>$(MathLinkLibrary);Common.lib;%(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>$(MathDelayLoad); $(CudaDlls); %(DelayLoadDLLs)</DelayLoadDLLs>
<Profile>true</Profile>
</Link>
<PostBuildEvent>
<Command>xcopy /D /I /Y "$(ACML_PATH)\lib\*.dll" "$(OutputPath)"</Command>
<Message>Copying ACML DLLs</Message>
<Command>xcopy /D /I /Y "$(MathPostBuildCopyPattern)" "$(OutDir)"</Command>
<Message>Copying $(MathLibraryName) DLLs</Message>
</PostBuildEvent>
<CudaCompile>
<TargetMachinePlatform>64</TargetMachinePlatform>
@ -103,7 +101,7 @@
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>USE_ACML; NO_SYNC; WIN32; NDEBUG; _WINDOWS; _USRDLL; MATH_EXPORTS; %(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>$(MathDefine); NO_SYNC; WIN32; NDEBUG; _WINDOWS; _USRDLL; MATH_EXPORTS; %(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>true</SDLCheck>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<FloatingPointModel>Fast</FloatingPointModel>
@ -119,13 +117,13 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<AdditionalDependencies>libacml_mp_dll.lib;Common.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>$(MathLinkLibrary);Common.lib;%(AdditionalDependencies)</AdditionalDependencies>
<DelayLoadDLLs>$(MathDelayLoad); $(CudaDlls); %(DelayLoadDLLs)</DelayLoadDLLs>
<Profile>true</Profile>
<DelayLoadDLLs>libacml_mp_dll.dll; $(CudaDlls); %(DelayLoadDLLs)</DelayLoadDLLs>
</Link>
<PostBuildEvent>
<Command>xcopy /D /I /Y "$(ACML_PATH)\lib\*.dll" "$(OutputPath)"</Command>
<Message>Copying ACML DLLs</Message>
<Command>xcopy /D /I /Y "$(MathPostBuildCopyPattern)" "$(OutDir)"</Command>
<Message>Copying $(MathLibraryName) DLLs</Message>
</PostBuildEvent>
<CudaCompile>
<FastMath>true</FastMath>
@ -155,7 +153,7 @@
<PreprocessorDefinitions>CPUONLY;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>
<Link>
<DelayLoadDLLs>libacml_mp_dll.dll</DelayLoadDLLs>
<DelayLoadDLLs>$(MathDelayLoad)</DelayLoadDLLs>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
@ -167,9 +165,9 @@
<ClInclude Include="ConvolutionEngine.h" />
<ClInclude Include="ConvolveGeometry.h" />
<ClInclude Include="CPUMatrix.h" />
<ClInclude Include="CPURNGHandle.h" />
<ClInclude Include="CPURNGHandle.h" />
<ClInclude Include="MatrixQuantizerImpl.h" />
<ClInclude Include="RNGHandle.h" />
<ClInclude Include="RNGHandle.h" />
<ClInclude Include="TensorOps.h" />
<ClInclude Include="TensorView.h" />
<None Include="GPUWatcher.cu" />
@ -193,7 +191,7 @@
<ItemGroup>
<ClCompile Include="BatchNormalizationEngine.cpp" />
<ClCompile Include="ConvolutionEngine.cpp" />
<ClCompile Include="CPURNGHandle.cpp" />
<ClCompile Include="CPURNGHandle.cpp" />
<ClCompile Include="CPUSparseMatrix.cpp" />
<ClCompile Include="CUDAPageLockedMemAllocator.cpp" />
<ClCompile Include="dllmain.cpp">
@ -207,7 +205,7 @@
<ClCompile Include="NoGPU.cpp" />
<ClCompile Include="Matrix.cpp" />
<ClCompile Include="QuantizedMatrix.cpp" />
<ClCompile Include="RNGHandle.cpp" />
<ClCompile Include="RNGHandle.cpp" />
<ClCompile Include="stdafx.cpp">
<PrecompiledHeader>Create</PrecompiledHeader>
</ClCompile>
@ -218,4 +216,8 @@
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets" />
</Project>
<Target Name="CheckDependencies">
<Error Condition="'$(CNTK_MKL)' == '1' And !Exists('$(CNTKCustomMKLPath)')" Text="CNTK custom MKL not found. See https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#optional-mkl for instructions." />
<Error Condition="'$(CNTK_MKL)' != '1' And !Exists('$(ACML_PATH)')" Text="ACML not found. See https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#acml for instructions." />
</Target>
</Project>

Просмотреть файл

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" InitialTargets="CheckDependencies" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
@ -65,6 +65,7 @@
<AdditionalIncludeDirectories>$(SolutionDir)Source\Common\include;$(CudaInclude);$(CUB_PATH);$(CuDnnIncPath)</AdditionalIncludeDirectories>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<FloatingPointModel>Fast</FloatingPointModel>
<DisableSpecificWarnings>4819</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalDependencies>$(CudaLibs);%(AdditionalDependencies)</AdditionalDependencies>
@ -82,7 +83,7 @@
<FastMath>true</FastMath>
<GPUDebugInfo>false</GPUDebugInfo>
<GPUDebugInfo Condition="'$(CNTK_CUDA_DEVICE_DEBUGINFO)'=='1'">true</GPUDebugInfo>
<AdditionalOptions>-Xcudafe "--diag_suppress=field_without_dll_interface" %(AdditionalOptions)</AdditionalOptions>
<AdditionalOptions>-Xcudafe "--diag_suppress=field_without_dll_interface" -Xcompiler "/wd 4819" %(AdditionalOptions)</AdditionalOptions>
</CudaCompile>
<PostBuildEvent>
<Command>for %%l in ($(CudaDlls)) do if exist "$(CudaPath)\bin\%%l" xcopy /D /Y "$(CudaPath)\bin\%%l*" "$(OutputPath)"
@ -198,4 +199,4 @@ if exist "$(CuDnnDll)" xcopy /D /Y "$(CuDnnDll)" "$(OutputPath)"
<Error Condition="!Exists('$(CUB_PATH)')" Text="CNTK requires the NVIDIA CUB library to build. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#cuda for installation instructions." />
<Error Condition="!Exists('$(CUDNN_PATH)')" Text="CNTK requires the NVIDIA cuDNN library to build. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#cuda for installation instructions." />
</Target>
</Project>
</Project>

Просмотреть файл

@ -4207,6 +4207,31 @@ void Matrix<ElemType>::MaxPoolingBackward(const Matrix<ElemType>& out, const Mat
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::MaxUnpooling(const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices, const Matrix<ElemType>& poolInput, Matrix<ElemType>& input) const
{
assert(mpRowCol.GetNumCols() == 1);
assert(mpRowIndices.GetNumCols() == 1);
assert(indices.GetNumCols() == 1);
DecideAndMoveToRightDevice(*this, input);
// REVIEW alexeyk: setting values to zero may cause inconsistency when negative values are unpooled.
// To see why, let's assume we have just one input with negative value and output of, for example, 2x2.
// As a result of unpooling, there will be 3 zero values and one negative. If we now apply max pooling
// operation to the output then we get 0 as the output, not the original negative value.
// In practice this will not happen as pooling layers usually go right after ReLU layer.
input.SetValue(0);
// REVIEW alexeyk: add sparse version.
DISPATCH_MATRIX_ON_FLAG(this,
this,
m_CPUMatrix->MaxUnpooling(*(mpRowCol.m_CPUMatrix), *(mpRowIndices.m_CPUMatrix), *(indices.m_CPUMatrix), *(poolInput.m_CPUMatrix), *(input.m_CPUMatrix)),
m_GPUMatrix->MaxUnpooling(*(mpRowCol.m_GPUMatrix), *(mpRowIndices.m_GPUMatrix), *(indices.m_GPUMatrix), *(poolInput.m_GPUMatrix), *(input.m_GPUMatrix)),
NOT_IMPLEMENTED,
NOT_IMPLEMENTED);
}
template <class ElemType>
void Matrix<ElemType>::AveragePoolingForward(const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices, Matrix<ElemType>& output) const
{

Просмотреть файл

@ -495,6 +495,7 @@ public:
void MaxPoolingBackward(const Matrix<ElemType>& out, const Matrix<ElemType>& in,
const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices,
Matrix<ElemType>& grad) const;
void MaxUnpooling(const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices, const Matrix<ElemType>& poolInput, Matrix<ElemType>& input) const;
void AveragePoolingForward(const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices, Matrix<ElemType>& output) const;
void AveragePoolingBackward(const Matrix<int>& mpRowCol, const Matrix<int>& mpRowIndices, const Matrix<int>& indices, Matrix<ElemType>& grad) const;

Просмотреть файл

@ -1809,6 +1809,11 @@ void GPUMatrix<ElemType>::MaxPoolingBackward(const GPUMatrix<ElemType>& out, con
{
}
template <class ElemType>
void GPUMatrix<ElemType>::MaxUnpooling(const GPUMatrix<int>& mpRowCol, const GPUMatrix<int>& mpRowIndices, const GPUMatrix<int>& indices, const GPUMatrix<ElemType>& poolInput, GPUMatrix<ElemType>& input) const
{
}
template <class ElemType>
void GPUMatrix<ElemType>::AveragePoolingForward(const GPUMatrix<int>& mpRowCol, const GPUMatrix<int>& mpRowIndices, const GPUMatrix<int>& indices, GPUMatrix<ElemType>& output) const
{

Просмотреть файл

@ -71,6 +71,7 @@
<SDLCheck>true</SDLCheck>
<TreatWarningAsError>true</TreatWarningAsError>
<OpenMPSupport>true</OpenMPSupport>
<DisableSpecificWarnings>4819</DisableSpecificWarnings>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
@ -146,4 +147,4 @@ if "$(UseZip)" == "true" if exist "$(ZLIB_PATH)\bin\zlib1.dll" (xcopy /I /D /Y "
<Warning Condition="!$(HasOpenCV)" Text="ImageReader requires the OpenCV library to build. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#opencv for installation instructions." />
<Warning Condition="!$(UseZip)" Text="zlib and libzip libraries were not found, ImageReader will be built without zip container support. Please see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#libzip for installation instructions." />
</Target>
</Project>
</Project>

Просмотреть файл

@ -8,6 +8,7 @@
#include "SpecialPurposeNodes.h" // for SequenceWithSoftmaxNode
#include "DataReaderHelpers.h"
#include "MatrixQuantizerImpl.h"
#ifdef CNTK_PARALLEL_TRAINING_SUPPORT
//static inline bool operator==(const std::pair<double,size_t>& a, double b) { assert(b==0); return a.first == b; }
// ^^ workaround until this line in AggregateGradientsImpl() gets updated: assert(headerCPU->evalErrors[i] == 0);
@ -2355,7 +2356,7 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
m_minibatchSearchCriterionErrorMargin = configAALR(L"minibatchSearchCriterionErrorMargin", (size_t) 1);
// the number of minibatches used to search
// the learning rate. It’s typically set to 10-20% of
// the learning rate. It's typically set to 10-20% of
// the total minibatches in an epoch.
m_numMiniBatch4LRSearch = configAALR(L"numMiniBatch4LRSearch", ConfigRecordType::Array(intargvector(vector<int>{500})));

Просмотреть файл

@ -43,6 +43,7 @@
</PrecompiledHeader>
<PreprocessorDefinitions>WIN32;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions Condition="'$(CNTK_ENABLE_1BitSGD)'=='true'">QUANTIZED_GRADIENT_AGGREGATION;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<DisableSpecificWarnings>4819</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(MSMPI_LIB64);$(OutDir);$(NvmlLibPath)</AdditionalLibraryDirectories>

Просмотреть файл

@ -52,6 +52,7 @@
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(SolutionDir)Source\Readers\ReaderLib;$(SolutionDir)Source\Math;$(SolutionDir)Source\Common\Include</AdditionalIncludeDirectories>
<DisableSpecificWarnings>4819</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(OutDir)</AdditionalLibraryDirectories>
@ -109,4 +110,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

Просмотреть файл

@ -68,6 +68,7 @@
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(BOOST_INCLUDE_PATH);$(SolutionDir)Source\Common\Include</AdditionalIncludeDirectories>
<DisableSpecificWarnings>4819</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(OutDir)..;$(BOOST_LIB_PATH)</AdditionalLibraryDirectories>
@ -158,13 +159,13 @@
<CuDnnDll Condition="$(GpuBuild) And Exists('$(OutDir)..\cudnn64_4.dll')">$(OutDir)..\cudnn64_4.dll</CuDnnDll>
</PropertyGroup>
<ItemGroup>
<UnitTestDependencies Include="$(OutDir)CNTK.Core.BS;$(OutDir)..\evaldll.dll;$(OutDir)..\Math.dll;$(OutDir)..\libacml_mp_dll.dll;$(OutDir)..\libifcoremd.dll;$(OutDir)..\libifportmd.dll;$(OutDir)..\libiomp*.dll;$(OutDir)..\libmmd.dll;$(OutDir)..\svml_dispmd.dll;" />
<UnitTestDependencies Include="$(OutDir)CNTK.Core.BS;$(OutDir)..\evaldll.dll;$(OutDir)..\Math.dll;$(UnitTestDlls)" />
</ItemGroup>
<ItemGroup Condition="$(GpuBuild)">
<UnitTestDependencies Include="$(OutDir)CNTK.Core.BS;$(OutDir)..\evaldll.dll;$(OutDir)..\cuda*.dll;$(OutDir)..\svml_dispmd.dll;$(CuDnnDll);$(UnitTestDependencies)" />
<UnitTestDependencies Include="$(OutDir)CNTK.Core.BS;$(OutDir)..\evaldll.dll;$(OutDir)..\cuda*.dll;$(CuDnnDll);$(UnitTestDependencies)" />
</ItemGroup>
<Copy SourceFiles="@(UnitTestDependencies)" DestinationFolder="$(OutDir)" SkipUnchangedFiles="true">
<Output TaskParameter="DestinationFiles" ItemName="NewFileWrites" />
</Copy>
</Target>
</Project>
</Project>

Просмотреть файл

@ -175,7 +175,7 @@ BOOST_AUTO_TEST_CASE(ConvolutionForward)
std::generate(begin(data) + r * c, begin(data) + 2 * r * c, [&] { return nd(rng); });
buf.SetValue(r, 3 * c, buf.GetDeviceId(), data.data());
// Get center slice.
return buf.ColumnSlice(c, c).DeepClone();
return buf.ColumnSlice(c, c);
};
int baseDeviceId = 0;
@ -224,7 +224,7 @@ BOOST_AUTO_TEST_CASE(ConvolutionForward)
std::string emsg;
BOOST_REQUIRE_MESSAGE(!out.HasNan("out"), "out" << msgNan);
BOOST_REQUIRE_MESSAGE(CheckEqual(out, outB, emsg, relErr * 4, absErr * 8), "out" << msg << ". " << emsg);
BOOST_REQUIRE_MESSAGE(CheckEqual(out, outB, emsg, relErr * 4, absErr * 9), "out" << msg << ". " << emsg);
BOOST_REQUIRE_MESSAGE(CountNans(outBuf) == crowOut * 2 * n, "out" << msgNotNan);
}
}
@ -243,7 +243,7 @@ BOOST_AUTO_TEST_CASE(ConvolutionBackwardData)
std::generate(begin(data) + r * c, begin(data) + 2 * r * c, [&] { return nd(rng); });
buf.SetValue(r, 3 * c, buf.GetDeviceId(), data.data());
// Get center slice.
return buf.ColumnSlice(c, c).DeepClone();
return buf.ColumnSlice(c, c);
};
int baseDeviceId = 0;
@ -380,7 +380,7 @@ BOOST_AUTO_TEST_CASE(PoolingForward)
std::generate(begin(data) + r * c, begin(data) + 2 * r * c, [&] { return nd(rng); });
buf.SetValue(r, 3 * c, buf.GetDeviceId(), data.data());
// Get center slice.
return buf.ColumnSlice(c, c).DeepClone();
return buf.ColumnSlice(c, c);
};
int baseDeviceId = 0;
@ -499,6 +499,87 @@ BOOST_AUTO_TEST_CASE(PoolingBackward)
}
}
BOOST_AUTO_TEST_CASE(MaxUnpooling)
{
using IntMatrix = Matrix<int>;
std::mt19937 rng(0);
std::uniform_int_distribution<> batchSizeG(1, 8);
// Using uniform distribution with positive values to avoid issues with
// unpooling negative values.
std::uniform_real_distribution<float> nd(0, 1);
auto initMat = [&](SingleMatrix& buf, size_t r, size_t c, vec& data) -> SingleMatrix
{
data.resize(r * 3 * c);
std::fill(begin(data), end(data), std::numeric_limits<float>::quiet_NaN());
std::generate(begin(data) + r * c, begin(data) + 2 * r * c, [&] { return nd(rng); });
buf.SetValue(r, 3 * c, buf.GetDeviceId(), data.data());
// Get center slice.
return buf.ColumnSlice(c, c);
};
int cpuDeviceId = -1;
int gpuDeviceId = 0;
for (const auto& g : GeneratePoolTestConfigs())
{
// cpuEng and gpuEng are used to compare results against each other.
auto cpuEng = ConvEng::Create(g, cpuDeviceId, ImageLayoutKind::CHW, 0, PoolKind::Max, ConvolutionEngineKind::Reference);
auto gpuEng = ConvEng::Create(g, gpuDeviceId, ImageLayoutKind::CHW, 0, PoolKind::Max, ConvolutionEngineKind::Reference);
size_t n = batchSizeG(rng);
vec buf;
buf.resize(g->InputShape().GetNumElements() * n);
std::generate(begin(buf), end(buf), [&] { return nd(rng); });
SingleMatrix inC(g->InputShape().GetNumElements(), n, buf.data(), cpuDeviceId, matrixFlagNormal);
SingleMatrix inG(g->InputShape().GetNumElements(), n, buf.data(), gpuDeviceId, matrixFlagNormal);
// First, compute max pooling output and corresponding mask.
SingleMatrix outC(g->OutputShape().GetNumElements(), n, cpuDeviceId);
SingleMatrix outG(g->OutputShape().GetNumElements(), n, gpuDeviceId);
cpuEng->ForwardPooling(inC, outC);
gpuEng->ForwardPooling(inG, outG);
// Second, do the unpooling.
size_t crowIn = g->InputShape().GetNumElements();
SingleMatrix inUBufC(cpuDeviceId);
SingleMatrix inUC = initMat(inUBufC, crowIn, n, buf);
SingleMatrix inUBufG(inUBufC.DeepClone(), gpuDeviceId);
SingleMatrix inUG = initMat(inUBufG, crowIn, n, buf);
cpuEng->MaxUnpooling(outC, inC, inUC);
gpuEng->MaxUnpooling(outG, inG, inUG);
// Check that CPU/GPU results are the same.
std::stringstream tmsg;
tmsg << "Geometry: " << (std::string)(*g) << ", Batch: " << n;
std::string msg = " are not equal, " + tmsg.str();
std::string msgNan = " has NaNs, " + tmsg.str();
std::string msgNotNan = " has buffer overflow/underflow, " + tmsg.str();
float relErr = 0;
float absErr = 0;
std::string emsg;
BOOST_REQUIRE_MESSAGE(!inUC.HasNan("inUC"), "inUC" << msgNan);
BOOST_REQUIRE_MESSAGE(!inUG.HasNan("inUG"), "inUG" << msgNan);
BOOST_REQUIRE_MESSAGE(CheckEqual(inUC, inUG, emsg, relErr, absErr), "inU" << msg << ". " << emsg);
BOOST_REQUIRE_MESSAGE(CountNans(inUBufC) == crowIn * 2 * n, "inUBufC" << msgNotNan);
BOOST_REQUIRE_MESSAGE(CountNans(inUBufG) == crowIn * 2 * n, "inUBufG" << msgNotNan);
// Now do the pooling from unpooled source and compare with original pooling.
SingleMatrix outC_2(g->OutputShape().GetNumElements(), n, cpuDeviceId);
SingleMatrix outG_2(g->OutputShape().GetNumElements(), n, gpuDeviceId);
cpuEng->ForwardPooling(inUC, outC_2);
gpuEng->ForwardPooling(inUG, outG_2);
BOOST_REQUIRE_MESSAGE(CheckEqual(outC_2, outC, emsg, relErr, absErr), "outC_2" << msg << ". " << emsg);
BOOST_REQUIRE_MESSAGE(CheckEqual(outG_2, outG, emsg, relErr, absErr), "outG_2" << msg << ". " << emsg);
}
}
BOOST_AUTO_TEST_SUITE_END()
} } } }

Просмотреть файл

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" InitialTargets="CheckDependencies" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
@ -67,6 +67,7 @@
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>$(BOOST_INCLUDE_PATH);$(SolutionDir)Source\Common\Include</AdditionalIncludeDirectories>
<DisableSpecificWarnings>4819</DisableSpecificWarnings>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(OutDir)..;$(BOOST_LIB_PATH)</AdditionalLibraryDirectories>
@ -168,13 +169,13 @@
<CuDnnDll Condition="$(GpuBuild) And Exists('$(OutDir)..\cudnn64_4.dll')">$(OutDir)..\cudnn64_4.dll</CuDnnDll>
</PropertyGroup>
<ItemGroup>
<UnitTestDependencies Include="$(OutDir)..\Math.dll;$(OutDir)..\libacml_mp_dll.dll;$(OutDir)..\libifcoremd.dll;$(OutDir)..\libifportmd.dll;$(OutDir)..\libiomp*.dll;$(OutDir)..\libmmd.dll;$(OutDir)..\svml_dispmd.dll;" />
<UnitTestDependencies Include="$(OutDir)..\Math.dll;$(UnitTestDlls);" />
</ItemGroup>
<ItemGroup Condition="$(GpuBuild)">
<UnitTestDependencies Include="$(OutDir)..\cuda*.dll;$(OutDir)..\svml_dispmd.dll;$(CuDnnDll);$(UnitTestDependencies)" />
<UnitTestDependencies Include="$(OutDir)..\cuda*.dll;$(CuDnnDll);$(UnitTestDependencies)" />
</ItemGroup>
<Copy SourceFiles="@(UnitTestDependencies)" DestinationFolder="$(OutDir)" SkipUnchangedFiles="true">
<Output TaskParameter="DestinationFiles" ItemName="NewFileWrites" />
</Copy>
</Target>
</Project>
</Project>

Просмотреть файл

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" InitialTargets="CheckDependencies" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
@ -58,6 +58,7 @@
<UseFullPaths>true</UseFullPaths>
<OpenMPSupport>true</OpenMPSupport>
<AdditionalIncludeDirectories>$(MSMPI_INC);$(SolutionDir)Source\Readers\ReaderLib;$(SolutionDir)Source\Common\Include;$(SolutionDir)Source\Math;$(SolutionDir)Source\ActionsLib;$(SolutionDir)Source\ComputationNetworkLib;$(SolutionDir)Source\CNTK\BrainScript;$(BOOST_INCLUDE_PATH)</AdditionalIncludeDirectories>
<DisableSpecificWarnings>4819</DisableSpecificWarnings>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
@ -134,10 +135,10 @@
</Target>
<Target Name="CopyUnitTestDependencies" AfterTargets="Build">
<ItemGroup>
<UnitTestDependencies Include="$(OutDir)..\Math.dll;$(OutDir)..\libacml_mp_dll.dll;$(OutDir)..\libifcoremd.dll;$(OutDir)..\libifportmd.dll;$(OutDir)..\libiomp*.dll;$(OutDir)..\libmmd.dll;$(OutDir)..\svml_dispmd.dll;" />
<UnitTestDependencies Include="$(OutDir)..\Math.dll;$(UnitTestDlls);" />
</ItemGroup>
<Copy SourceFiles="@(UnitTestDependencies)" DestinationFolder="$(OutDir)" SkipUnchangedFiles="true">
<Output TaskParameter="DestinationFiles" ItemName="NewFileWrites" />
</Copy>
</Target>
</Project>
</Project>

Просмотреть файл

@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" InitialTargets="CheckDependencies" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
@ -262,7 +262,7 @@
</PropertyGroup>
<Target Name="CopyUnitTestDependencies" AfterTargets="Build">
<ItemGroup>
<UnitTestDependencies Include="$(OutDir)..\Math.dll;$(OutDir)..\ucifastreader.dll;$(OutDir)..\cntktextformatreader.dll;$(OutDir)..\htkmlfreader.dll;$(OutDir)..\HTKDeserializers.dll;$(OutDir)..\compositedatareader.dll;$(OutDir)..\libacml_mp_dll.dll;$(OutDir)..\libifcoremd.dll;$(OutDir)..\libifportmd.dll;$(OutDir)..\libiomp*.dll;$(OutDir)..\libmmd.dll;$(OutDir)..\svml_dispmd.dll;$(ImageReaderDependencies);" />
<UnitTestDependencies Include="$(OutDir)..\Math.dll;$(OutDir)..\ucifastreader.dll;$(OutDir)..\cntktextformatreader.dll;$(OutDir)..\htkmlfreader.dll;$(OutDir)..\HTKDeserializers.dll;$(OutDir)..\compositedatareader.dll;$(UnitTestDlls);$(ImageReaderDependencies);" />
</ItemGroup>
<Copy SourceFiles="@(UnitTestDependencies)" DestinationFolder="$(OutDir)" SkipUnchangedFiles="true">
<Output TaskParameter="DestinationFiles" ItemName="NewFileWrites" />

Просмотреть файл

@ -1,4 +1,12 @@
#!/bin/bash
#
# Copyright (c) Microsoft. All rights reserved.
#
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
#
# TODO --math-library support on Windows
# Setting some default values
BUILD=1
@ -9,6 +17,7 @@ RANDOM_OUTPUT=0
CODE_COVERAGE=no
FLAVORS="debug:release"
TARGETS="cpu:gpu"
MATH_LIBRARY="acml"
TESTTARGETS="cpu:gpu"
# parsing command line arguments:
@ -25,6 +34,7 @@ case $key in
echo " -b|--build-only - just build, do not run"
echo " -f|--flavors <flavor1:flavor2...> - which flavor to build (by default $FLAVORS)"
echo " -t|--targets <target1:target2...> - which target to build (by default $TARGETS)"
echo " -m|--math-library <mathlibrary> - which math library to build with (by default $MATH_LIBRARY)"
echo " -tt|--test-targets <testtarget1:testtarget2...> - which target to test (by default $TESTTARGETS)"
echo " -cc|--code-coverage - build with support for code coverage (gcov)"
echo " -cb|--clean-build - clean up the enlistment binaries before build"
@ -64,6 +74,24 @@ case $key in
TARGETS="${2,,}"
shift # past argument
;;
-m|--math-library)
case ${2,,} in
acml)
MATH_LIBRARY_OPTION="--with-acml=$ACML_PATH"
;;
mkl)
MATH_LIBRARY_OPTION="--with-mkl=$MKL_PATH"
;;
mkl-sequential)
MATH_LIBRARY_OPTION="--with-mkl-sequential=$MKL_PATH"
;;
*)
echo Unknown math library $MATH_LIBRARY
exit 1
;;
esac
shift # past argument
;;
-tt|--test-targets)
TESTTARGETS="${2,,}"
shift # past argument
@ -76,7 +104,7 @@ case $key in
shift # past argument
;;
*)
echo Unkown option $key
echo Unknown option $key
exit 1
;;
esac
@ -86,7 +114,7 @@ done
# Step 0 -- Validate all necessary prerequisites and check for incompatible options
# It is possible to use this script on Windows to build CNTK
# from Cygwin window with Visual C++ environment loaded.
# In that case OS environment variable will be set and we
# In that case OS environment variable will be set and we
# can use it to differentiate from Linux.
if [[ $CLEAN_BEFORE == 1 && $RUN == 1 && $BUILD == 0 ]]; then
echo "============ ERROR: Incompatible options RUN and CLEAN_BEFORE set without BUILD ============"
@ -99,7 +127,7 @@ if [[ $OS == "Windows_NT" && $OSTYPE == "cygwin" ]]; then
PREFIX_DIR=x64
BIN_NAME=CNTK.exe
BUILD_OS="windows"
if [[ $VS120COMNTOOLS == "" ]]; then
echo "============ Visual Studio 12.0 environment not properly setup or VS not installed ============"
echo "============ Please find and run the appropriate vcvarsall.bat script ============"
@ -224,7 +252,7 @@ if [[ $BUILD == 1 ]]; then
OneBitSGDOPT=yes
fi
fi
./configure --with-build-top=$BUILD_DIR --with-acml=$ACML_PATH --with-buildtype=$FLAVOR --cuda=$CUDAOPT --with-code-coverage=$CODE_COVERAGE --1bitsgd=$OneBitSGDOPT
./configure --with-build-top=$BUILD_DIR ${MATH_LIBRARY_OPTION} --with-buildtype=$FLAVOR --cuda=$CUDAOPT --with-code-coverage=$CODE_COVERAGE --1bitsgd=$OneBitSGDOPT
if [[ $CLEAN_BEFORE == 1 ]]; then
make -C $BUILD_DIR -f $MAKEFILE clean 1>&6 2>&7 || exit $?
fi
@ -278,7 +306,7 @@ if [[ $RUN == 1 ]]; then
fi
OUT_FILE="$RUN_FILE.$FLAVOR.$TARGET.$TESTTARGET.out"
BIN_PATH=$CNTK_ROOT/$PREFIX_DIR/$FLAVOR_DIR/$BIN_NAME
BIN_PATH=$CNTK_ROOT/$PREFIX_DIR/$FLAVOR_DIR/$BIN_NAME
if ! [[ -f $BIN_PATH ]]; then
echo "============ ERROR: CNTK did not build properly for $TARGET/$FLAVOR ============"
echo "Missing file: $BIN_PATH"

Просмотреть файл

@ -1,5 +1,11 @@
#!/bin/bash
#
# Copyright (c) Microsoft. All rights reserved.
#
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
#
# Description: this script is used to generated buildinfo.h in Source/CNTK
# which will contain the following infomation to be displayed at runtime:
# BUILDTYPE (release/debug)
@ -24,7 +30,7 @@ usage ()
echo "This script assumes git can be used"
echo "This script assumes Config.make has been made"
echo "-------------------------------------------------------------------"
if [ ! -z "$1" ] ; then
if [ ! -z "$1" ] ; then
echo "ERROR message: $1"
fi
exit 1
@ -60,20 +66,20 @@ makebuildinfo()
printf "#define _MATHLIB_ \"%s\"\n" "$MATHLIB"
printf "#define _BUILDSHA1_ \"%s\"\n" "$GIT_COMMIT"
printf "#define _BUILDBRANCH_ \"%s\"\n" "$GIT_BRANCH"
if [ -z "$CUDA_PATH" ]; then
if [ -z "$CUDA_PATH" ]; then
printf "#define _BUILDTARGET_ \"CPU-only\"\n"
else
printf "#define _BUILDTARGET_ \"GPU\"\n"
printf "#define _CUDA_PATH_ \"%s\"\n" "$CUDA_PATH"
fi
if [ ! -z "$CUB_PATH" ]; then
if [ ! -z "$CUB_PATH" ]; then
printf "#define _CUB_PATH_ \"%s\"\n" "$CUB_PATH"
fi
if [ ! -z "$CUDNN_PATH" ]; then
if [ ! -z "$CUDNN_PATH" ]; then
printf "#define _CUDNN_PATH_ \"%s\"\n" $CUDNN_PATH
fi
printf "#define _BUILDTYPE_ \"%s\"\n" "$BUILDTYPE"
if [ ! -z "$WITH_1BITSGD" ]; then
if [ ! -z "$WITH_1BITSGD" ]; then
printf "#define _WITH_1BITSGD_ \"yes\"\n"
else
printf "#define _WITH_1BITSGD_ \"no\"\n"
@ -87,48 +93,53 @@ makebuildinfo()
#//////////////////////////////////////////////////////#
# main function #
#//////////////////////////////////////////////////////#
if [ $# -ne 1 ]; then
usage
#//////////////////////////////////////////////////////#
if [ $# -ne 1 ]; then
usage
fi
config=$1
# 1. check whether we have git and what is the sha-1 value
# Check whether we have git and what is the SHA-1 value
if Has_Git; then has_git=1; else has_git=0; usage "git does not exist"; fi
GIT_STATUS=' (modified)'
git diff --quiet && git diff --cached --quiet && GIT_STATUS=''
GIT_COMMIT=`git rev-parse HEAD`$GIT_STATUS
GIT_BRANCH=`git rev-parse --abbrev-ref HEAD`
# 2. looking into Config.make
if [ ! -e $config ] ; then
# Looking into Config.make
if [ ! -e $config ] ; then
usage "Config.make not exists"
fi
source $config
# 3. whether we have CUDA_PATH
if [ -z "${CUDA_PATH+x}" ]; then
# Whether we have CUDA_PATH
if [ -z "${CUDA_PATH+x}" ]; then
CUDAPATH=""
else
CUDAPATH=$CUDA_PATH
fi
# 4. whether we have CUB_PATH
if [ -z "${CUB_PATH+x}" ]; then
# Whether we have CUB_PATH
if [ -z "${CUB_PATH+x}" ]; then
CUBPATH=""
else
CUBPATH=$CUB_PATH
fi
# 5. Build machine info
# Identify MKL variant being used
if [ "$MATHLIB" = "mkl" -a "$MKL_THREADING" = "sequential" ]; then
MATHLIB=mkl-sequential
fi
# Build machine info
BUILDER=$USER
BUILDMACHINE=`hostname`
BUILDPATH=`pwd`
# 6. make buildinfo.h (only update if changed)
# Make buildinfo.h (only update if changed)
target=Source/CNTK/buildinfo.h
if [ ! -d Source ] ; then
if [ ! -d Source ] ; then
usage
fi

37
configure поставляемый
Просмотреть файл

@ -1,4 +1,11 @@
#!/bin/bash
#
# Copyright (c) Microsoft. All rights reserved.
#
# Licensed under the MIT license. See LICENSE.md file in the project root
# for full license information.
# ==============================================================================
#
configure=$0
build_top=$PWD
@ -12,9 +19,12 @@ have_acml=no
acml_path=
acml_check=include/acml.h
# CNTK Custom MKL Version
cntk_custom_mkl_version=1
have_mkl=no
mkl_path=
mkl_check=mkl/include/mkl.h
mkl_check=$cntk_custom_mkl_version/include/mkl.h
# Experimental OpenBLAS support.
have_openblas=no
@ -23,7 +33,7 @@ openblas_check=include/openblas_config.h
have_kaldi=no
kaldi_path=
kaldi_check=src/kaldi.mk
kaldi_check=src/kaldi.mk
have_buildtype=no
buildtype=
@ -58,11 +68,11 @@ default_use_code_coverage=no
enable_code_coverage=$default_use_code_coverage
# List from best to worst choice
default_path_list="/usr /usr/local /opt /opt/local /opt/intel"
default_path_list="/usr /usr/local /opt /opt/local"
# List from best to worst choice
default_acmls="acml5.3.1/ifort64_mp"
default_mkls=""
default_mkls="CNTKCustomMKL"
default_openblas=""
# NOTE: Will get compilation errors with cuda-6.0
@ -171,7 +181,7 @@ function is_hardlinked ()
echo $r
}
function default_use_cuda ()
function default_use_cuda ()
{
if test x$(find_cuda) = x || test x$(find_gdk) = x
then
@ -182,7 +192,7 @@ function default_use_cuda ()
}
enable_cuda=$(default_use_cuda)
function show_default ()
function show_default ()
{
if test x$1 = x
then
@ -207,6 +217,7 @@ function show_help ()
echo " --with-cudnn[=directory] $(show_default $(find_cudnn))"
echo " --with-acml[=directory] $(show_default $(find_acml))"
echo " --with-mkl[=directory] $(show_default $(find_mkl))"
echo " --with-mkl-sequential[=directory] $(show_default $(find_mkl))"
echo " --with-openblas[=directory] (experimental) $(show_default $(find_openblas))"
echo " --with-buildtype=(debug|release) $(show_default $default_buildtype)"
echo " --with-kaldi[=directory] $(show_default $(find_kaldi))"
@ -392,12 +403,16 @@ do
--with-mkl*)
have_mkl=yes
mathlib=mkl
mkl_threading=parallel
case $key in
--with-mkl-sequential*) mkl_threading=sequential ;;
esac
if test x$optarg = x
then
mkl_path=$(find_mkl)
if test x$mkl_path = x
then
echo "Cannot find mkl directory"
echo "Cannot find CNTK custom MKL directory"
echo "Please specify a value for --with-mkl"
exit 1
fi
@ -406,7 +421,7 @@ do
then
mkl_path=$optarg
else
echo "Invalid mkl directory $optarg"
echo "Invalid CNTK custom MKL directory $optarg"
exit 1
fi
fi
@ -534,7 +549,7 @@ then
if test x$mkl_path = x
then
echo "Cannot find a CPU math library."
echo "Please specify --with-acml, --with-mkl, --with-openblas with a path."
echo "Please specify --with-acml, --with-mkl, --with-mkl-sequential, --with-openblas with a path."
exit 1
else
mathlib=mkl
@ -637,6 +652,8 @@ case $mathlib in
;;
mkl)
echo MKL_PATH=$mkl_path >> $config
echo MKL_THREADING=$mkl_threading >> $config
echo CNTK_CUSTOM_MKL_VERSION=$cntk_custom_mkl_version >> $config
;;
openblas)
echo OPENBLAS_PATH=$openblas_path >> $config
@ -678,6 +695,6 @@ then
echo all clean : >> $makefile
printf '\t$(MAKE) -C $(dir) BUILD_TOP=$(BUILD_TOP) $@\n' >> $makefile
fi
echo run
echo run
echo '>make -j all'
echo to build