Fix two perf problems
1. On Linux, use ACML MP as default ACML library. 2. For the parallel end-to-end tests, limit concurrency based on available hardware threads.
This commit is contained in:
Родитель
d9a3558d03
Коммит
b1f0ad70ba
2
Makefile
2
Makefile
|
@ -111,7 +111,7 @@ endif
|
|||
ifeq ("$(MATHLIB)","acml")
|
||||
INCLUDEPATH += $(ACML_PATH)/include
|
||||
LIBPATH += $(ACML_PATH)/lib
|
||||
LIBS += -lacml -lm -lpthread
|
||||
LIBS += -lacml_mp -liomp5 -lm -lpthread
|
||||
CPPFLAGS += -DUSE_ACML
|
||||
endif
|
||||
|
||||
|
|
|
@ -4,9 +4,11 @@
|
|||
|
||||
ConfigDir=$TEST_DIR/../..
|
||||
LogFileName=stderr
|
||||
Instances=4
|
||||
NumCPUThreads=$(threadsPerInstance $Instances)
|
||||
|
||||
# cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
|
||||
cntkmpirun "-n 4" SimpleMultiGPU.config "precision=double SimpleMultiGPU=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]"
|
||||
cntkmpirun "-n $Instances" SimpleMultiGPU.config "numCPUThreads=$NumCPUThreads precision=double SimpleMultiGPU=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]]"
|
||||
ExitCode=$?
|
||||
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank0
|
||||
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank1
|
||||
|
|
|
@ -4,9 +4,11 @@
|
|||
|
||||
ConfigDir=$TEST_DIR/../..
|
||||
LogFileName=stderr
|
||||
Instances=4
|
||||
NumCPUThreads=$(threadsPerInstance $Instances)
|
||||
|
||||
# cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
|
||||
cntkmpirun "-n 4" SimpleMultiGPU.config "precision=float SimpleMultiGPU=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=32]]]]"
|
||||
cntkmpirun "-n $Instances" SimpleMultiGPU.config "numCPUThreads=$NumCPUThreads precision=float SimpleMultiGPU=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=32]]]]"
|
||||
ExitCode=$?
|
||||
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank0
|
||||
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_SimpleMultiGPU.logrank1
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
#!/bin/bash
|
||||
|
||||
#!/bin/bash
|
||||
|
||||
. $TEST_ROOT_DIR/run-test-common
|
||||
|
||||
# cntkrun <CNTK config file name> <additional CNTK args>
|
||||
|
|
|
@ -4,9 +4,11 @@
|
|||
|
||||
ConfigDir=$TEST_DIR/..
|
||||
LogFileName=stderr
|
||||
Instances=3
|
||||
NumCPUThreads=$(threadsPerInstance $Instances)
|
||||
|
||||
# cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
|
||||
cntkmpirun "-n 3" cntk.config "precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]"
|
||||
cntkmpirun "-n $Instances" cntk.config "numCPUThreads=$NumCPUThreads precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]"
|
||||
ExitCode=$?
|
||||
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank0
|
||||
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank1
|
||||
|
|
|
@ -4,9 +4,11 @@
|
|||
|
||||
ConfigDir=$TEST_DIR/..
|
||||
LogFileName=stderr
|
||||
Instances=3
|
||||
NumCPUThreads=$(threadsPerInstance $Instances)
|
||||
|
||||
# cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
|
||||
cntkmpirun "-n 3" cntk.config
|
||||
cntkmpirun "-n $Instances" cntk.config "numCPUThreads=$NumCPUThreads"
|
||||
ExitCode=$?
|
||||
sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank0
|
||||
sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank1
|
||||
|
|
|
@ -88,6 +88,14 @@ cntkrun()
|
|||
return $?
|
||||
}
|
||||
|
||||
# Given number of instances, return number of hardware threads we can use per
|
||||
# instance
|
||||
threadsPerInstance()
|
||||
{
|
||||
local threads=$((`nproc` / $1))
|
||||
[[ $threads -eq 0 ]] && echo 1 || echo $threads
|
||||
}
|
||||
|
||||
# Function for launching a parallel CNTK run with MPI
|
||||
# cntkmpirun <MPI args> <CNTK config file name> <additional CNTK args>
|
||||
cntkmpirun()
|
||||
|
|
|
@ -41,7 +41,7 @@ mathlib=
|
|||
default_path_list="/usr /usr/local /opt /opt/local"
|
||||
|
||||
# List from best to worst choice
|
||||
default_acmls="acml5.3.1/ifort64"
|
||||
default_acmls="acml5.3.1/ifort64_mp"
|
||||
default_mkls=""
|
||||
|
||||
# NOTE: Will get compilation errors with cuda-6.0
|
||||
|
|
Загрузка…
Ссылка в новой задаче