diff --git a/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/.03_ResNet-parallel.cntk.swp b/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/.03_ResNet-parallel.cntk.swp new file mode 100644 index 000000000..c10181011 Binary files /dev/null and b/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/.03_ResNet-parallel.cntk.swp differ diff --git a/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/03_ResNet-parallel.cntk b/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/03_ResNet-parallel.cntk index 822b1d11a..e52d4c183 100644 --- a/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/03_ResNet-parallel.cntk +++ b/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/03_ResNet-parallel.cntk @@ -86,7 +86,7 @@ Train = [ reader = [ readerType = "ImageReader" - file = "$DataDir$/cifar-10-batches-py/train_map.txt" + file = "$DataDir$/train_map.txt" randomize = "auto" features = [ width = 32 @@ -96,7 +96,7 @@ Train = [ cropRatio = 0.8 jitterType = "uniRatio" interpolations = "linear" - meanFile = "$DataDir$/cifar-10-batches-py/CIFAR-10_mean.xml" + meanFile = "$DataDir$/CIFAR-10_mean.xml" ] labels = [ labelDim = 10 @@ -105,7 +105,7 @@ Train = [ cvReader = [ readerType = "ImageReader" - file = "$DataDir$/cifar-10-batches-py/test_map.txt" + file = "$DataDir$/test_map.txt" randomize = "none" features = [ width = 32 @@ -115,7 +115,7 @@ Train = [ cropRatio = 1 jitterType = "uniRatio" interpolations = "linear" - meanFile = "$DataDir$/cifar-10-batches-py/CIFAR-10_mean.xml" + meanFile = "$DataDir$/CIFAR-10_mean.xml" ] labels = [ labelDim = 10 diff --git a/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/baseline.txt b/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/baseline.txt new file mode 100644 index 000000000..e69de29bb diff --git a/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/run-test b/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/run-test old mode 100644 new mode 100755 index 72aebc90f..29d70b855 --- a/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/run-test +++ b/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/run-test @@ -2,20 +2,21 @@ . $TEST_DIR/run-test-common +#dataDir="." ConfigDir=$TEST_DIR -LogFileName=stderr +LogFileName="ASGDMultiGPU" Instances=4 NumCPUThreads=$(threadsPerInstance $Instances) # cntkmpirun -cntkmpirun "-n $Instances" 03_ResNet-parallel.cntk "numCPUThreads=$NumCPUThreads precision=float SimpleMultiGPU=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=32]]]]" +cntkmpirun "-n $Instances" 03_ResNet-parallel.cntk "numCPUThreads=$NumCPUThreads precision=float parallelTrain=true SimpleMultiGPU=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=32]]]]" ExitCode=$? -sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_ASGDMultiGPU.logrank0 -sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_ASGDMultiGPU.logrank1 -sed 's/^/MPI Rank 2: /' $TEST_RUN_DIR/"$LogFileName"_ASGDMultiGPU.logrank2 -sed 's/^/MPI Rank 3: /' $TEST_RUN_DIR/"$LogFileName"_ASGDMultiGPU.logrank3 +sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_Train.logrank0 +sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_Train.logrank1 +sed 's/^/MPI Rank 2: /' $TEST_RUN_DIR/"$LogFileName"_Train.logrank2 +sed 's/^/MPI Rank 3: /' $TEST_RUN_DIR/"$LogFileName"_Train.logrank3 # Delete the test data if copied -[[ "$Copied" -eq "1" ]] && rm -rf "$DataDir" +#[[ "$Copied" -eq "1" ]] && rm -rf "$DataDir" exit $ExitCode diff --git a/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/run-test-common b/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/run-test-common index 037491c1d..bc7f8b522 100644 --- a/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/run-test-common +++ b/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/run-test-common @@ -4,6 +4,7 @@ ConfigDir=$TEST_DIR if [[ ! -d $TEST_DATA_DIR || ! -e $TEST_DATA_DIR/Train_cntk_text.txt || ! -e $TEST_DATA_DIR/train_map.txt ]]; then + echo "test_data_dir"; # Cannot find test data locally. # Try external test data directory (not part of the CNTK repository) as an alternative. if [[ -d "$CNTK_EXTERNAL_TESTDATA_SOURCE_DIRECTORY" ]]; then @@ -16,9 +17,9 @@ if [[ ! -d $TEST_DATA_DIR || ! -e $TEST_DATA_DIR/Train_cntk_text.txt || ! -e $TE # Copy the test data to the test run directory DataDir=$TEST_RUN_DIR/TestData mkdir $DataDir - mkdir $DataDir/cifar-10-batches-py + mkdir $DataDir/cifar-10-batches-py cp -R $DataSourceDir/*_cntk_text.txt $DataDir || exit $? - cp -R $DataSourceDir/cifar-10-batches-py/data.zip $DataDir/cifar-10-batches-py || exit $? + #cp -R $DataSourceDir/cifar-10-batches-py/data.zip $DataDir/cifar-10-batches-py || exit $? cp -R $DataSourceDir/cifar-10-batches-py/CIFAR-10_mean.xml $DataDir || exit $? cp -R $DataSourceDir/cifar-10-batches-py/*_map.txt $DataDir || exit $? Copied=1 @@ -26,4 +27,4 @@ if [[ ! -d $TEST_DATA_DIR || ! -e $TEST_DATA_DIR/Train_cntk_text.txt || ! -e $TE echo Error: cannot find data. Please see Examples/Image/DataSets/CIFAR10/README.md for instructions to get it. exit 1 fi -fi \ No newline at end of file +fi diff --git a/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/testcases.yml b/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/testcases.yml index 0d3178a26..887a10968 100644 --- a/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/testcases.yml +++ b/Tests/EndToEndTests/ParallelTraining/AsynchronizedSGD/testcases.yml @@ -1,4 +1,5 @@ -# dataDir: ../../Data +dataDir: . +#../../Data tags: # running on every BVT job in 'P' (Parallel) leg in Debug-GPU and Release-CPU configurations: