diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/cntkcv.cntk b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/cntkcv.cntk index a648558e3..f8473b3e2 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/cntkcv.cntk +++ b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/cntkcv.cntk @@ -78,7 +78,7 @@ speechTrain = [ ] ] reader = [ - readerType = "HTKMLFReader" + readerType = "HTKDeserializers" readMethod = "blockRandomize" miniBatchMode = "partial" randomize = "auto" @@ -99,7 +99,7 @@ speechTrain = [ ] ] cvreader = [ - readerType = "HTKMLFReader" + readerType = "HTKDeserializers" readMethod = "blockRandomize" miniBatchMode = "partial" randomize = "auto" diff --git a/Tests/EndToEndTests/Speech/DNN/cntk.cntk b/Tests/EndToEndTests/Speech/DNN/cntk.cntk index 325e68c90..5c3e20482 100644 --- a/Tests/EndToEndTests/Speech/DNN/cntk.cntk +++ b/Tests/EndToEndTests/Speech/DNN/cntk.cntk @@ -78,7 +78,7 @@ speechTrain = [ ] ] reader = [ - readerType = "HTKMLFReader" + readerType = "HTKDeserializers" readMethod = "blockRandomize" miniBatchMode = "partial" randomize = "auto" diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.cpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.cpu.txt deleted file mode 100644 index a799c7c45..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.cpu.txt +++ /dev/null @@ -1,438 +0,0 @@ -CPU info: - CPU Model Name: Intel(R) Xeon(R) CPU W3530 @ 2.80GHz - Hardware threads: 4 - Total Memory: 12580404 kB -------------------------------------------------------------------- -=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr -CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:31:19) on cntk-muc03 at 2016/12/15 08:43:27 - -C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes on a single host (3 requested); we (1) are in (participating) -CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:31:19) on cntk-muc03 at 2016/12/15 08:43:27 - -C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes on a single host (3 requested); we (2) are in (participating) -CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:31:19) on cntk-muc03 at 2016/12/15 08:43:28 - -C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes on a single host (3 requested); we (0) are in (participating) -MPI Rank 0: 12/15/2016 08:43:28: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr_speechTrain.logrank0 -MPI Rank 0: CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:31:19) on cntk-muc03 at 2016/12/15 08:43:28 -MPI Rank 0: -MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr -MPI Rank 0: 12/15/2016 08:43:28: Using 1 CPU threads. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:28: ############################################################################## -MPI Rank 0: 12/15/2016 08:43:28: # # -MPI Rank 0: 12/15/2016 08:43:28: # speechTrain command (train action) # -MPI Rank 0: 12/15/2016 08:43:28: # # -MPI Rank 0: 12/15/2016 08:43:28: ############################################################################## -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:28: -MPI Rank 0: Creating virgin network. -MPI Rank 0: SimpleNetworkBuilder Using CPU -MPI Rank 0: reading script file glob_0000.scp ... 948 entries -MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 0: label set 0: 129 classes -MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 0: 12/15/2016 08:43:28: -MPI Rank 0: Model has 25 nodes. Using CPU. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:28: Training criterion: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 0: 12/15/2016 08:43:28: Evaluation criterion: EvalClassificationError = ClassificationError -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: Allocating matrices for forward and/or backward propagation. -MPI Rank 0: -MPI Rank 0: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 0: -MPI Rank 0: { B1 : [512 x 1] (gradient) -MPI Rank 0: H2 : [512 x 1 x *] (gradient) -MPI Rank 0: HLast : [132 x 1 x *] (gradient) } -MPI Rank 0: { H1 : [512 x 1 x *] -MPI Rank 0: W0*features : [512 x *] (gradient) } -MPI Rank 0: { W0 : [512 x 363] (gradient) -MPI Rank 0: W0*features+B0 : [512 x 1 x *] } -MPI Rank 0: { B0 : [512 x 1] (gradient) -MPI Rank 0: H1 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 0: W2*H1 : [132 x 1 x *] } -MPI Rank 0: { W1 : [512 x 512] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 0: { H2 : [512 x 1 x *] -MPI Rank 0: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 0: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1 : [512 x 1 x *] } -MPI Rank 0: { HLast : [132 x 1 x *] -MPI Rank 0: W2 : [132 x 512] (gradient) } -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:28: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:28: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 12/15/2016 08:43:28: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 12/15/2016 08:43:28: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 0: 12/15/2016 08:43:28: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 0: 12/15/2016 08:43:28: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 0: 12/15/2016 08:43:28: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 0: -MPI Rank 0: Initializing dataParallelSGD for 1-bit quantization. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:28: Precomputing --> 3 PreCompute nodes found. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:28: MeanOfFeatures = Mean() -MPI Rank 0: 12/15/2016 08:43:28: InvStdOfFeatures = InvStdDev() -MPI Rank 0: 12/15/2016 08:43:28: Prior = Mean() -MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:31: Precomputing --> Completed. -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:32: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:32: Starting minibatch loop. -MPI Rank 0: 12/15/2016 08:43:32: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.59755198 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.2935s; samplesPerSecond = 2180.9 -MPI Rank 0: 12/15/2016 08:43:32: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.34610349 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.2502s; samplesPerSecond = 2557.8 -MPI Rank 0: 12/15/2016 08:43:33: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98222516 * 640; EvalClassificationError = 0.89062500 * 640; time = 0.2488s; samplesPerSecond = 2572.3 -MPI Rank 0: 12/15/2016 08:43:33: Epoch[ 1 of 3]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.74152814 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.2510s; samplesPerSecond = 2550.0 -MPI Rank 0: 12/15/2016 08:43:33: Epoch[ 1 of 3]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83818572 * 640; EvalClassificationError = 0.86718750 * 640; time = 0.2515s; samplesPerSecond = 2544.9 -MPI Rank 0: 12/15/2016 08:43:33: Epoch[ 1 of 3]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71641238 * 640; EvalClassificationError = 0.87500000 * 640; time = 0.2520s; samplesPerSecond = 2539.6 -MPI Rank 0: 12/15/2016 08:43:34: Epoch[ 1 of 3]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.41802791 * 640; EvalClassificationError = 0.79687500 * 640; time = 0.2508s; samplesPerSecond = 2552.3 -MPI Rank 0: 12/15/2016 08:43:34: Epoch[ 1 of 3]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53832947 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2526s; samplesPerSecond = 2533.3 -MPI Rank 0: 12/15/2016 08:43:34: Epoch[ 1 of 3]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.50628076 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.2518s; samplesPerSecond = 2541.2 -MPI Rank 0: 12/15/2016 08:43:34: Epoch[ 1 of 3]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.41478252 * 640; EvalClassificationError = 0.80781250 * 640; time = 0.2526s; samplesPerSecond = 2534.1 -MPI Rank 0: 12/15/2016 08:43:35: Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.51031210 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2518s; samplesPerSecond = 2541.8 -MPI Rank 0: 12/15/2016 08:43:35: Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.28365485 * 640; EvalClassificationError = 0.79375000 * 640; time = 0.2514s; samplesPerSecond = 2545.8 -MPI Rank 0: 12/15/2016 08:43:35: Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.20932117 * 640; EvalClassificationError = 0.79531250 * 640; time = 0.2504s; samplesPerSecond = 2556.4 -MPI Rank 0: 12/15/2016 08:43:35: Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07460534 * 640; EvalClassificationError = 0.75468750 * 640; time = 0.2733s; samplesPerSecond = 2341.6 -MPI Rank 0: 12/15/2016 08:43:36: Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.97529104 * 640; EvalClassificationError = 0.72031250 * 640; time = 0.2691s; samplesPerSecond = 2378.0 -MPI Rank 0: 12/15/2016 08:43:36: Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.11968882 * 640; EvalClassificationError = 0.74531250 * 640; time = 0.2697s; samplesPerSecond = 2372.6 -MPI Rank 0: 12/15/2016 08:43:36: Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.84172140 * 640; EvalClassificationError = 0.71093750 * 640; time = 0.2651s; samplesPerSecond = 2414.3 -MPI Rank 0: 12/15/2016 08:43:36: Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74031745 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2580s; samplesPerSecond = 2480.4 -MPI Rank 0: 12/15/2016 08:43:37: Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83858085 * 640; EvalClassificationError = 0.72656250 * 640; time = 0.2583s; samplesPerSecond = 2477.8 -MPI Rank 0: 12/15/2016 08:43:37: Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.74632253 * 640; EvalClassificationError = 0.69218750 * 640; time = 0.2602s; samplesPerSecond = 2459.3 -MPI Rank 0: 12/15/2016 08:43:37: Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.61033254 * 640; EvalClassificationError = 0.66250000 * 640; time = 0.2686s; samplesPerSecond = 2382.6 -MPI Rank 0: 12/15/2016 08:43:38: Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61330754 * 640; EvalClassificationError = 0.65000000 * 640; time = 0.2696s; samplesPerSecond = 2374.2 -MPI Rank 0: 12/15/2016 08:43:38: Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.54591810 * 640; EvalClassificationError = 0.66406250 * 640; time = 0.2573s; samplesPerSecond = 2487.5 -MPI Rank 0: 12/15/2016 08:43:38: Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.57566512 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2532s; samplesPerSecond = 2528.1 -MPI Rank 0: 12/15/2016 08:43:38: Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.49164945 * 640; EvalClassificationError = 0.63281250 * 640; time = 0.2533s; samplesPerSecond = 2526.8 -MPI Rank 0: 12/15/2016 08:43:39: Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39954796 * 640; EvalClassificationError = 0.62812500 * 640; time = 0.2547s; samplesPerSecond = 2512.3 -MPI Rank 0: 12/15/2016 08:43:39: Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27034227 * 640; EvalClassificationError = 0.59375000 * 640; time = 0.2627s; samplesPerSecond = 2436.6 -MPI Rank 0: 12/15/2016 08:43:39: Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.52112387 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2555s; samplesPerSecond = 2504.4 -MPI Rank 0: 12/15/2016 08:43:39: Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27800991 * 640; EvalClassificationError = 0.59062500 * 640; time = 0.2635s; samplesPerSecond = 2428.4 -MPI Rank 0: 12/15/2016 08:43:40: Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26783634 * 640; EvalClassificationError = 0.61093750 * 640; time = 0.2645s; samplesPerSecond = 2419.2 -MPI Rank 0: 12/15/2016 08:43:40: Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24590355 * 640; EvalClassificationError = 0.58593750 * 640; time = 0.2686s; samplesPerSecond = 2382.5 -MPI Rank 0: 12/15/2016 08:43:40: Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.24415615 * 640; EvalClassificationError = 0.59843750 * 640; time = 0.2491s; samplesPerSecond = 2568.9 -MPI Rank 0: 12/15/2016 08:43:40: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.04696987 * 20480; EvalClassificationError = 0.73583984 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=8.29563s -MPI Rank 0: 12/15/2016 08:43:40: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn.1' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:40: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:40: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. -MPI Rank 0: 12/15/2016 08:43:41: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.20280589 * 2560; EvalClassificationError = 0.60234375 * 2560; time = 0.5844s; samplesPerSecond = 4380.2 -MPI Rank 0: 12/15/2016 08:43:41: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.16401892 * 2560; EvalClassificationError = 0.56992188 * 2560; time = 0.5734s; samplesPerSecond = 4464.6 -MPI Rank 0: 12/15/2016 08:43:42: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.10520889 * 2560; EvalClassificationError = 0.56640625 * 2560; time = 0.5672s; samplesPerSecond = 4513.7 -MPI Rank 0: 12/15/2016 08:43:42: Epoch[ 2 of 3]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.07595031 * 2560; EvalClassificationError = 0.56875000 * 2560; time = 0.5632s; samplesPerSecond = 4545.8 -MPI Rank 0: 12/15/2016 08:43:43: Epoch[ 2 of 3]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.09291426 * 2560; EvalClassificationError = 0.57148438 * 2560; time = 0.5727s; samplesPerSecond = 4470.1 -MPI Rank 0: 12/15/2016 08:43:44: Epoch[ 2 of 3]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.02267717 * 2560; EvalClassificationError = 0.55585938 * 2560; time = 0.5532s; samplesPerSecond = 4627.9 -MPI Rank 0: 12/15/2016 08:43:44: Epoch[ 2 of 3]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 2.00026902 * 2560; EvalClassificationError = 0.54492188 * 2560; time = 0.5579s; samplesPerSecond = 4588.9 -MPI Rank 0: 12/15/2016 08:43:45: Epoch[ 2 of 3]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 2.00974791 * 2560; EvalClassificationError = 0.55820313 * 2560; time = 0.5667s; samplesPerSecond = 4517.4 -MPI Rank 0: 12/15/2016 08:43:45: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.08419905 * 20480; EvalClassificationError = 0.56723633 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.56497s -MPI Rank 0: 12/15/2016 08:43:45: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn.2' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:45: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:45: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. -MPI Rank 0: 12/15/2016 08:43:46: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.96836606 * 10240; EvalClassificationError = 0.53740234 * 10240; time = 1.4889s; samplesPerSecond = 6877.4 -MPI Rank 0: 12/15/2016 08:43:48: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.98822385 * 10240; EvalClassificationError = 0.55458984 * 10240; time = 1.4599s; samplesPerSecond = 7014.1 -MPI Rank 0: 12/15/2016 08:43:48: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.97829495 * 20480; EvalClassificationError = 0.54599609 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=2.975s -MPI Rank 0: 12/15/2016 08:43:48: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu/models/cntkSpeech.dnn' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:48: Action "train" complete. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:43:48: __COMPLETED__ -MPI Rank 1: 12/15/2016 08:43:28: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr_speechTrain.logrank1 -MPI Rank 1: CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:31:19) on cntk-muc03 at 2016/12/15 08:43:27 -MPI Rank 1: -MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr -MPI Rank 1: 12/15/2016 08:43:28: Using 1 CPU threads. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:28: ############################################################################## -MPI Rank 1: 12/15/2016 08:43:28: # # -MPI Rank 1: 12/15/2016 08:43:28: # speechTrain command (train action) # -MPI Rank 1: 12/15/2016 08:43:28: # # -MPI Rank 1: 12/15/2016 08:43:28: ############################################################################## -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:28: -MPI Rank 1: Creating virgin network. -MPI Rank 1: SimpleNetworkBuilder Using CPU -MPI Rank 1: reading script file glob_0000.scp ... 948 entries -MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 1: label set 0: 129 classes -MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 1: 12/15/2016 08:43:28: -MPI Rank 1: Model has 25 nodes. Using CPU. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:28: Training criterion: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 1: 12/15/2016 08:43:28: Evaluation criterion: EvalClassificationError = ClassificationError -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: Allocating matrices for forward and/or backward propagation. -MPI Rank 1: -MPI Rank 1: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 1: -MPI Rank 1: { H2 : [512 x 1 x *] -MPI Rank 1: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 1: { HLast : [132 x 1 x *] -MPI Rank 1: W2 : [132 x 512] (gradient) } -MPI Rank 1: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1 : [512 x 1 x *] } -MPI Rank 1: { W0 : [512 x 363] (gradient) -MPI Rank 1: W0*features+B0 : [512 x 1 x *] } -MPI Rank 1: { W1 : [512 x 512] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 1: { B1 : [512 x 1] (gradient) -MPI Rank 1: H2 : [512 x 1 x *] (gradient) -MPI Rank 1: HLast : [132 x 1 x *] (gradient) } -MPI Rank 1: { H1 : [512 x 1 x *] -MPI Rank 1: W0*features : [512 x *] (gradient) } -MPI Rank 1: { B0 : [512 x 1] (gradient) -MPI Rank 1: H1 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 1: W2*H1 : [132 x 1 x *] } -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:28: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:28: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 12/15/2016 08:43:28: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 12/15/2016 08:43:28: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 1: 12/15/2016 08:43:28: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 1: 12/15/2016 08:43:28: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 1: 12/15/2016 08:43:28: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 1: -MPI Rank 1: Initializing dataParallelSGD for 1-bit quantization. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:28: Precomputing --> 3 PreCompute nodes found. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:28: MeanOfFeatures = Mean() -MPI Rank 1: 12/15/2016 08:43:28: InvStdOfFeatures = InvStdDev() -MPI Rank 1: 12/15/2016 08:43:28: Prior = Mean() -MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:31: Precomputing --> Completed. -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:32: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:32: Starting minibatch loop. -MPI Rank 1: 12/15/2016 08:43:32: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.59755198 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.2766s; samplesPerSecond = 2314.2 -MPI Rank 1: 12/15/2016 08:43:32: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.34610349 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.2508s; samplesPerSecond = 2552.1 -MPI Rank 1: 12/15/2016 08:43:33: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98222516 * 640; EvalClassificationError = 0.89062500 * 640; time = 0.2515s; samplesPerSecond = 2545.1 -MPI Rank 1: 12/15/2016 08:43:33: Epoch[ 1 of 3]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.74152814 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.2519s; samplesPerSecond = 2541.0 -MPI Rank 1: 12/15/2016 08:43:33: Epoch[ 1 of 3]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83818572 * 640; EvalClassificationError = 0.86718750 * 640; time = 0.2515s; samplesPerSecond = 2544.3 -MPI Rank 1: 12/15/2016 08:43:33: Epoch[ 1 of 3]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71641238 * 640; EvalClassificationError = 0.87500000 * 640; time = 0.2520s; samplesPerSecond = 2539.8 -MPI Rank 1: 12/15/2016 08:43:34: Epoch[ 1 of 3]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.41802791 * 640; EvalClassificationError = 0.79687500 * 640; time = 0.2483s; samplesPerSecond = 2577.4 -MPI Rank 1: 12/15/2016 08:43:34: Epoch[ 1 of 3]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53832947 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2529s; samplesPerSecond = 2531.1 -MPI Rank 1: 12/15/2016 08:43:34: Epoch[ 1 of 3]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.50628076 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.2534s; samplesPerSecond = 2525.4 -MPI Rank 1: 12/15/2016 08:43:34: Epoch[ 1 of 3]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.41478252 * 640; EvalClassificationError = 0.80781250 * 640; time = 0.2519s; samplesPerSecond = 2541.0 -MPI Rank 1: 12/15/2016 08:43:35: Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.51031210 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2508s; samplesPerSecond = 2551.7 -MPI Rank 1: 12/15/2016 08:43:35: Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.28365485 * 640; EvalClassificationError = 0.79375000 * 640; time = 0.2587s; samplesPerSecond = 2474.2 -MPI Rank 1: 12/15/2016 08:43:35: Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.20932117 * 640; EvalClassificationError = 0.79531250 * 640; time = 0.2536s; samplesPerSecond = 2523.3 -MPI Rank 1: 12/15/2016 08:43:35: Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07460534 * 640; EvalClassificationError = 0.75468750 * 640; time = 0.2777s; samplesPerSecond = 2304.7 -MPI Rank 1: 12/15/2016 08:43:36: Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.97529104 * 640; EvalClassificationError = 0.72031250 * 640; time = 0.2543s; samplesPerSecond = 2516.6 -MPI Rank 1: 12/15/2016 08:43:36: Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.11968882 * 640; EvalClassificationError = 0.74531250 * 640; time = 0.2608s; samplesPerSecond = 2454.2 -MPI Rank 1: 12/15/2016 08:43:36: Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.84172140 * 640; EvalClassificationError = 0.71093750 * 640; time = 0.2635s; samplesPerSecond = 2429.0 -MPI Rank 1: 12/15/2016 08:43:36: Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74031745 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2577s; samplesPerSecond = 2483.7 -MPI Rank 1: 12/15/2016 08:43:37: Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83858085 * 640; EvalClassificationError = 0.72656250 * 640; time = 0.2585s; samplesPerSecond = 2475.9 -MPI Rank 1: 12/15/2016 08:43:37: Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.74632253 * 640; EvalClassificationError = 0.69218750 * 640; time = 0.2595s; samplesPerSecond = 2466.8 -MPI Rank 1: 12/15/2016 08:43:37: Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.61033254 * 640; EvalClassificationError = 0.66250000 * 640; time = 0.2588s; samplesPerSecond = 2472.5 -MPI Rank 1: 12/15/2016 08:43:37: Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61330754 * 640; EvalClassificationError = 0.65000000 * 640; time = 0.2557s; samplesPerSecond = 2503.4 -MPI Rank 1: 12/15/2016 08:43:38: Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.54591810 * 640; EvalClassificationError = 0.66406250 * 640; time = 0.2604s; samplesPerSecond = 2457.8 -MPI Rank 1: 12/15/2016 08:43:38: Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.57566512 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2561s; samplesPerSecond = 2499.3 -MPI Rank 1: 12/15/2016 08:43:38: Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.49164945 * 640; EvalClassificationError = 0.63281250 * 640; time = 0.2628s; samplesPerSecond = 2435.0 -MPI Rank 1: 12/15/2016 08:43:39: Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39954796 * 640; EvalClassificationError = 0.62812500 * 640; time = 0.2638s; samplesPerSecond = 2426.1 -MPI Rank 1: 12/15/2016 08:43:39: Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27034227 * 640; EvalClassificationError = 0.59375000 * 640; time = 0.2621s; samplesPerSecond = 2441.8 -MPI Rank 1: 12/15/2016 08:43:39: Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.52112387 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2566s; samplesPerSecond = 2494.5 -MPI Rank 1: 12/15/2016 08:43:39: Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27800991 * 640; EvalClassificationError = 0.59062500 * 640; time = 0.2629s; samplesPerSecond = 2434.2 -MPI Rank 1: 12/15/2016 08:43:40: Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26783634 * 640; EvalClassificationError = 0.61093750 * 640; time = 0.2545s; samplesPerSecond = 2514.8 -MPI Rank 1: 12/15/2016 08:43:40: Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24590355 * 640; EvalClassificationError = 0.58593750 * 640; time = 0.2542s; samplesPerSecond = 2518.1 -MPI Rank 1: 12/15/2016 08:43:40: Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.24415615 * 640; EvalClassificationError = 0.59843750 * 640; time = 0.2584s; samplesPerSecond = 2477.0 -MPI Rank 1: 12/15/2016 08:43:40: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.04696987 * 20480; EvalClassificationError = 0.73583984 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=8.255s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:40: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:40: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. -MPI Rank 1: 12/15/2016 08:43:41: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.20280589 * 2560; EvalClassificationError = 0.60234375 * 2560; time = 0.5976s; samplesPerSecond = 4283.8 -MPI Rank 1: 12/15/2016 08:43:41: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.16401892 * 2560; EvalClassificationError = 0.56992188 * 2560; time = 0.5734s; samplesPerSecond = 4464.3 -MPI Rank 1: 12/15/2016 08:43:42: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.10520889 * 2560; EvalClassificationError = 0.56640625 * 2560; time = 0.5670s; samplesPerSecond = 4514.6 -MPI Rank 1: 12/15/2016 08:43:42: Epoch[ 2 of 3]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.07595031 * 2560; EvalClassificationError = 0.56875000 * 2560; time = 0.5633s; samplesPerSecond = 4544.9 -MPI Rank 1: 12/15/2016 08:43:43: Epoch[ 2 of 3]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.09291426 * 2560; EvalClassificationError = 0.57148438 * 2560; time = 0.5726s; samplesPerSecond = 4471.1 -MPI Rank 1: 12/15/2016 08:43:44: Epoch[ 2 of 3]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.02267717 * 2560; EvalClassificationError = 0.55585938 * 2560; time = 0.5532s; samplesPerSecond = 4627.9 -MPI Rank 1: 12/15/2016 08:43:44: Epoch[ 2 of 3]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 2.00026902 * 2560; EvalClassificationError = 0.54492188 * 2560; time = 0.5580s; samplesPerSecond = 4587.5 -MPI Rank 1: 12/15/2016 08:43:45: Epoch[ 2 of 3]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 2.00974791 * 2560; EvalClassificationError = 0.55820313 * 2560; time = 0.5667s; samplesPerSecond = 4517.5 -MPI Rank 1: 12/15/2016 08:43:45: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.08419905 * 20480; EvalClassificationError = 0.56723633 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.57669s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:45: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:45: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. -MPI Rank 1: 12/15/2016 08:43:46: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.96836606 * 10240; EvalClassificationError = 0.53740234 * 10240; time = 1.4927s; samplesPerSecond = 6859.8 -MPI Rank 1: 12/15/2016 08:43:48: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.98822385 * 10240; EvalClassificationError = 0.55458984 * 10240; time = 1.4597s; samplesPerSecond = 7015.3 -MPI Rank 1: 12/15/2016 08:43:48: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.97829495 * 20480; EvalClassificationError = 0.54599609 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=2.97679s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:48: Action "train" complete. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:43:48: __COMPLETED__ -MPI Rank 2: 12/15/2016 08:43:29: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr_speechTrain.logrank2 -MPI Rank 2: CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:31:19) on cntk-muc03 at 2016/12/15 08:43:27 -MPI Rank 2: -MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_Parallel1BitQuantization@release_cpu/stderr -MPI Rank 2: 12/15/2016 08:43:29: Using 1 CPU threads. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:29: ############################################################################## -MPI Rank 2: 12/15/2016 08:43:29: # # -MPI Rank 2: 12/15/2016 08:43:29: # speechTrain command (train action) # -MPI Rank 2: 12/15/2016 08:43:29: # # -MPI Rank 2: 12/15/2016 08:43:29: ############################################################################## -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:29: -MPI Rank 2: Creating virgin network. -MPI Rank 2: SimpleNetworkBuilder Using CPU -MPI Rank 2: reading script file glob_0000.scp ... 948 entries -MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 2: label set 0: 129 classes -MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 2: 12/15/2016 08:43:29: -MPI Rank 2: Model has 25 nodes. Using CPU. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:29: Training criterion: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 2: 12/15/2016 08:43:29: Evaluation criterion: EvalClassificationError = ClassificationError -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: Allocating matrices for forward and/or backward propagation. -MPI Rank 2: -MPI Rank 2: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 2: -MPI Rank 2: { B1 : [512 x 1] (gradient) -MPI Rank 2: H2 : [512 x 1 x *] (gradient) -MPI Rank 2: HLast : [132 x 1 x *] (gradient) } -MPI Rank 2: { W0 : [512 x 363] (gradient) -MPI Rank 2: W0*features+B0 : [512 x 1 x *] } -MPI Rank 2: { H2 : [512 x 1 x *] -MPI Rank 2: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 2: { W1 : [512 x 512] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 2: { B0 : [512 x 1] (gradient) -MPI Rank 2: H1 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 2: W2*H1 : [132 x 1 x *] } -MPI Rank 2: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1 : [512 x 1 x *] } -MPI Rank 2: { HLast : [132 x 1 x *] -MPI Rank 2: W2 : [132 x 512] (gradient) } -MPI Rank 2: { H1 : [512 x 1 x *] -MPI Rank 2: W0*features : [512 x *] (gradient) } -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:29: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:29: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 12/15/2016 08:43:29: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 12/15/2016 08:43:29: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 2: 12/15/2016 08:43:29: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 2: 12/15/2016 08:43:29: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 2: 12/15/2016 08:43:29: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 2: -MPI Rank 2: Initializing dataParallelSGD for 1-bit quantization. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:29: Precomputing --> 3 PreCompute nodes found. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:29: MeanOfFeatures = Mean() -MPI Rank 2: 12/15/2016 08:43:29: InvStdOfFeatures = InvStdDev() -MPI Rank 2: 12/15/2016 08:43:29: Prior = Mean() -MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:32: Precomputing --> Completed. -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:32: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:32: Starting minibatch loop. -MPI Rank 2: 12/15/2016 08:43:32: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.59755198 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.2979s; samplesPerSecond = 2148.6 -MPI Rank 2: 12/15/2016 08:43:32: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.34610349 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.2500s; samplesPerSecond = 2559.5 -MPI Rank 2: 12/15/2016 08:43:33: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98222516 * 640; EvalClassificationError = 0.89062500 * 640; time = 0.2493s; samplesPerSecond = 2567.7 -MPI Rank 2: 12/15/2016 08:43:33: Epoch[ 1 of 3]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.74152814 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.2524s; samplesPerSecond = 2535.9 -MPI Rank 2: 12/15/2016 08:43:33: Epoch[ 1 of 3]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83818572 * 640; EvalClassificationError = 0.86718750 * 640; time = 0.2519s; samplesPerSecond = 2540.5 -MPI Rank 2: 12/15/2016 08:43:33: Epoch[ 1 of 3]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71641238 * 640; EvalClassificationError = 0.87500000 * 640; time = 0.2524s; samplesPerSecond = 2535.7 -MPI Rank 2: 12/15/2016 08:43:34: Epoch[ 1 of 3]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.41802791 * 640; EvalClassificationError = 0.79687500 * 640; time = 0.2509s; samplesPerSecond = 2551.1 -MPI Rank 2: 12/15/2016 08:43:34: Epoch[ 1 of 3]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53832947 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2536s; samplesPerSecond = 2523.7 -MPI Rank 2: 12/15/2016 08:43:34: Epoch[ 1 of 3]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.50628076 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.2564s; samplesPerSecond = 2496.2 -MPI Rank 2: 12/15/2016 08:43:34: Epoch[ 1 of 3]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.41478252 * 640; EvalClassificationError = 0.80781250 * 640; time = 0.2523s; samplesPerSecond = 2536.5 -MPI Rank 2: 12/15/2016 08:43:35: Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.51031210 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2508s; samplesPerSecond = 2551.4 -MPI Rank 2: 12/15/2016 08:43:35: Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.28365485 * 640; EvalClassificationError = 0.79375000 * 640; time = 0.2539s; samplesPerSecond = 2520.8 -MPI Rank 2: 12/15/2016 08:43:35: Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.20932117 * 640; EvalClassificationError = 0.79531250 * 640; time = 0.2521s; samplesPerSecond = 2538.6 -MPI Rank 2: 12/15/2016 08:43:35: Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07460534 * 640; EvalClassificationError = 0.75468750 * 640; time = 0.2586s; samplesPerSecond = 2474.7 -MPI Rank 2: 12/15/2016 08:43:36: Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.97529104 * 640; EvalClassificationError = 0.72031250 * 640; time = 0.2692s; samplesPerSecond = 2377.6 -MPI Rank 2: 12/15/2016 08:43:36: Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.11968882 * 640; EvalClassificationError = 0.74531250 * 640; time = 0.2642s; samplesPerSecond = 2422.4 -MPI Rank 2: 12/15/2016 08:43:36: Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.84172140 * 640; EvalClassificationError = 0.71093750 * 640; time = 0.2559s; samplesPerSecond = 2500.5 -MPI Rank 2: 12/15/2016 08:43:36: Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74031745 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2514s; samplesPerSecond = 2545.5 -MPI Rank 2: 12/15/2016 08:43:37: Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83858085 * 640; EvalClassificationError = 0.72656250 * 640; time = 0.2543s; samplesPerSecond = 2516.6 -MPI Rank 2: 12/15/2016 08:43:37: Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.74632253 * 640; EvalClassificationError = 0.69218750 * 640; time = 0.2587s; samplesPerSecond = 2474.3 -MPI Rank 2: 12/15/2016 08:43:37: Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.61033254 * 640; EvalClassificationError = 0.66250000 * 640; time = 0.2696s; samplesPerSecond = 2374.1 -MPI Rank 2: 12/15/2016 08:43:38: Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61330754 * 640; EvalClassificationError = 0.65000000 * 640; time = 0.2697s; samplesPerSecond = 2373.3 -MPI Rank 2: 12/15/2016 08:43:38: Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.54591810 * 640; EvalClassificationError = 0.66406250 * 640; time = 0.2606s; samplesPerSecond = 2455.6 -MPI Rank 2: 12/15/2016 08:43:38: Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.57566512 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2574s; samplesPerSecond = 2486.1 -MPI Rank 2: 12/15/2016 08:43:38: Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.49164945 * 640; EvalClassificationError = 0.63281250 * 640; time = 0.2608s; samplesPerSecond = 2454.2 -MPI Rank 2: 12/15/2016 08:43:39: Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39954796 * 640; EvalClassificationError = 0.62812500 * 640; time = 0.2607s; samplesPerSecond = 2455.2 -MPI Rank 2: 12/15/2016 08:43:39: Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27034227 * 640; EvalClassificationError = 0.59375000 * 640; time = 0.2525s; samplesPerSecond = 2534.4 -MPI Rank 2: 12/15/2016 08:43:39: Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.52112387 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2501s; samplesPerSecond = 2559.3 -MPI Rank 2: 12/15/2016 08:43:39: Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27800991 * 640; EvalClassificationError = 0.59062500 * 640; time = 0.2584s; samplesPerSecond = 2476.8 -MPI Rank 2: 12/15/2016 08:43:40: Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26783634 * 640; EvalClassificationError = 0.61093750 * 640; time = 0.2644s; samplesPerSecond = 2420.6 -MPI Rank 2: 12/15/2016 08:43:40: Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24590355 * 640; EvalClassificationError = 0.58593750 * 640; time = 0.2678s; samplesPerSecond = 2390.1 -MPI Rank 2: 12/15/2016 08:43:40: Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.24415615 * 640; EvalClassificationError = 0.59843750 * 640; time = 0.2574s; samplesPerSecond = 2486.5 -MPI Rank 2: 12/15/2016 08:43:40: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.04696987 * 20480; EvalClassificationError = 0.73583984 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=8.27847s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:40: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:40: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. -MPI Rank 2: 12/15/2016 08:43:41: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.20280589 * 2560; EvalClassificationError = 0.60234375 * 2560; time = 0.5974s; samplesPerSecond = 4285.5 -MPI Rank 2: 12/15/2016 08:43:41: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.16401892 * 2560; EvalClassificationError = 0.56992188 * 2560; time = 0.5734s; samplesPerSecond = 4464.3 -MPI Rank 2: 12/15/2016 08:43:42: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.10520889 * 2560; EvalClassificationError = 0.56640625 * 2560; time = 0.5671s; samplesPerSecond = 4514.4 -MPI Rank 2: 12/15/2016 08:43:42: Epoch[ 2 of 3]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.07595031 * 2560; EvalClassificationError = 0.56875000 * 2560; time = 0.5631s; samplesPerSecond = 4546.0 -MPI Rank 2: 12/15/2016 08:43:43: Epoch[ 2 of 3]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.09291426 * 2560; EvalClassificationError = 0.57148438 * 2560; time = 0.5726s; samplesPerSecond = 4471.0 -MPI Rank 2: 12/15/2016 08:43:44: Epoch[ 2 of 3]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.02267717 * 2560; EvalClassificationError = 0.55585938 * 2560; time = 0.5530s; samplesPerSecond = 4629.2 -MPI Rank 2: 12/15/2016 08:43:44: Epoch[ 2 of 3]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 2.00026902 * 2560; EvalClassificationError = 0.54492188 * 2560; time = 0.5580s; samplesPerSecond = 4587.5 -MPI Rank 2: 12/15/2016 08:43:45: Epoch[ 2 of 3]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 2.00974791 * 2560; EvalClassificationError = 0.55820313 * 2560; time = 0.5666s; samplesPerSecond = 4518.0 -MPI Rank 2: 12/15/2016 08:43:45: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.08419905 * 20480; EvalClassificationError = 0.56723633 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.5763s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:45: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:45: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. -MPI Rank 2: 12/15/2016 08:43:46: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.96836606 * 10240; EvalClassificationError = 0.53740234 * 10240; time = 1.4771s; samplesPerSecond = 6932.6 -MPI Rank 2: 12/15/2016 08:43:48: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.98822385 * 10240; EvalClassificationError = 0.55458984 * 10240; time = 1.4597s; samplesPerSecond = 7015.3 -MPI Rank 2: 12/15/2016 08:43:48: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.97829495 * 20480; EvalClassificationError = 0.54599609 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=2.96064s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:48: Action "train" complete. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:43:48: __COMPLETED__ diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.gpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.gpu.txt deleted file mode 100644 index c4a3616a0..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.gpu.txt +++ /dev/null @@ -1,1749 +0,0 @@ -CPU info: - CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz - Hardware threads: 24 - Total Memory: 268381192 kB -------------------------------------------------------------------- -=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu DeviceId=0 timestamping=true numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr -------------------------------------------------------------------- -Build info: - - Built time: Aug 16 2016 03:09:16 - Last modified date: Fri Aug 12 05:28:23 2016 - Build type: Release - Build target: GPU - With 1bit-SGD: yes - Math lib: mkl - CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 - CUB_PATH: c:\src\cub-1.4.1 - CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda - Build Branch: HEAD - Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 - Built by svcphil on Philly-Pool1 - Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -------------------------------------------------------------------- -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPIWrapper: initializing MPI -------------------------------------------------------------------- -Build info: - - Built time: Aug 16 2016 03:09:16 - Last modified date: Fri Aug 12 05:28:23 2016 - Build type: Release - Build target: GPU - With 1bit-SGD: yes - Math lib: mkl - CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 - CUB_PATH: c:\src\cub-1.4.1 - CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda - Build Branch: HEAD - Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 - Built by svcphil on Philly-Pool1 - Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -------------------------------------------------------------------- -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPIWrapper: initializing MPI -------------------------------------------------------------------- -Build info: - - Built time: Aug 16 2016 03:09:16 - Last modified date: Fri Aug 12 05:28:23 2016 - Build type: Release - Build target: GPU - With 1bit-SGD: yes - Math lib: mkl - CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 - CUB_PATH: c:\src\cub-1.4.1 - CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda - Build Branch: HEAD - Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 - Built by svcphil on Philly-Pool1 - Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -------------------------------------------------------------------- -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPIWrapper: initializing MPI -ping [requestnodes (before change)]: 3 nodes pinging each other -ping [requestnodes (before change)]: 3 nodes pinging each other -ping [requestnodes (before change)]: 3 nodes pinging each other -ping [requestnodes (before change)]: all 3 nodes responded -ping [requestnodes (before change)]: all 3 nodes responded -ping [requestnodes (before change)]: all 3 nodes responded -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating) -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating) -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating) -ping [requestnodes (after change)]: 3 nodes pinging each other -ping [requestnodes (after change)]: 3 nodes pinging each other -ping [requestnodes (after change)]: 3 nodes pinging each other -ping [requestnodes (after change)]: all 3 nodes responded -ping [requestnodes (after change)]: all 3 nodes responded -ping [requestnodes (after change)]: all 3 nodes responded -mpihelper: we are cog 2 in a gearbox of 3 -mpihelper: we are cog 1 in a gearbox of 3 -mpihelper: we are cog 0 in a gearbox of 3 -ping [mpihelper]: 3 nodes pinging each other -ping [mpihelper]: 3 nodes pinging each other -ping [mpihelper]: 3 nodes pinging each other -ping [mpihelper]: all 3 nodes responded -ping [mpihelper]: all 3 nodes responded -ping [mpihelper]: all 3 nodes responded -MPI Rank 0: 08/16/2016 03:19:18: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr_speechTrain.logrank0 -MPI Rank 0: 08/16/2016 03:19:18: ------------------------------------------------------------------- -MPI Rank 0: 08/16/2016 03:19:18: Build info: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:18: Built time: Aug 16 2016 03:09:16 -MPI Rank 0: 08/16/2016 03:19:18: Last modified date: Fri Aug 12 05:28:23 2016 -MPI Rank 0: 08/16/2016 03:19:18: Build type: Release -MPI Rank 0: 08/16/2016 03:19:18: Build target: GPU -MPI Rank 0: 08/16/2016 03:19:18: With 1bit-SGD: yes -MPI Rank 0: 08/16/2016 03:19:18: Math lib: mkl -MPI Rank 0: 08/16/2016 03:19:18: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 -MPI Rank 0: 08/16/2016 03:19:18: CUB_PATH: c:\src\cub-1.4.1 -MPI Rank 0: 08/16/2016 03:19:18: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda -MPI Rank 0: 08/16/2016 03:19:18: Build Branch: HEAD -MPI Rank 0: 08/16/2016 03:19:18: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 -MPI Rank 0: 08/16/2016 03:19:18: Built by svcphil on Philly-Pool1 -MPI Rank 0: 08/16/2016 03:19:18: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -MPI Rank 0: 08/16/2016 03:19:18: ------------------------------------------------------------------- -MPI Rank 0: 08/16/2016 03:19:20: ------------------------------------------------------------------- -MPI Rank 0: 08/16/2016 03:19:20: GPU info: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:20: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 0: 08/16/2016 03:19:20: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 0: 08/16/2016 03:19:20: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 0: 08/16/2016 03:19:20: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 0: 08/16/2016 03:19:20: ------------------------------------------------------------------- -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:20: Running on DPHAIM-24 at 2016/08/16 03:19:20 -MPI Rank 0: 08/16/2016 03:19:20: Command line: -MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu DeviceId=0 timestamping=true numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:20: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>> -MPI Rank 0: 08/16/2016 03:19:20: precision = "float" -MPI Rank 0: command = speechTrain -MPI Rank 0: deviceId = $DeviceId$ -MPI Rank 0: parallelTrain = true -MPI Rank 0: speechTrain = [ -MPI Rank 0: action = "train" -MPI Rank 0: modelPath = "$RunDir$/models/cntkSpeech.dnn" -MPI Rank 0: deviceId = $DeviceId$ -MPI Rank 0: traceLevel = 1 -MPI Rank 0: SimpleNetworkBuilder = [ -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 0: evalCriterion = "ClassificationError" -MPI Rank 0: layerTypes = "Sigmoid" -MPI Rank 0: initValueScale = 1.0 -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: uniformInit = true -MPI Rank 0: needPrior = true -MPI Rank 0: ] -MPI Rank 0: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = 'CE' -MPI Rank 0: evalCriterion = 'Err' -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: L = Length(layerSizes)-1 // number of model layers -MPI Rank 0: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 0: featNorm = if applyMeanVarNorm -MPI Rank 0: then MeanVarNorm(features) -MPI Rank 0: else features -MPI Rank 0: layers[layer:1..L-1] = if layer > 1 -MPI Rank 0: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 0: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 0: CE = if trainingCriterion == 'CE' -MPI Rank 0: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 0: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 0: Err = if evalCriterion == 'Err' then -MPI Rank 0: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 0: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 0: logPrior = LogPrior(labels) -MPI Rank 0: // TODO: how to add a tag to an infix operation? -MPI Rank 0: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 0: ] -MPI Rank 0: SGD = [ -MPI Rank 0: epochSize = 20480 -MPI Rank 0: minibatchSize = 64:256:1024 -MPI Rank 0: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 0: numMBsToShowResult = 10 -MPI Rank 0: momentumPerMB = 0.9:0.656119 -MPI Rank 0: dropoutRate = 0.0 -MPI Rank 0: maxEpochs = 3 -MPI Rank 0: keepCheckPointFiles = true -MPI Rank 0: clippingThresholdPerSample = 1#INF -MPI Rank 0: ParallelTrain = [ -MPI Rank 0: parallelizationMethod = "DataParallelSGD" -MPI Rank 0: distributedMBReading = true -MPI Rank 0: DataParallelSGD = [ -MPI Rank 0: gradientBits = 32 -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: AutoAdjust = [ -MPI Rank 0: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 0: loadBestModel = true -MPI Rank 0: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 0: learnRateDecreaseFactor = 0.5 -MPI Rank 0: learnRateIncreaseFactor = 1.382 -MPI Rank 0: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: reader = [ -MPI Rank 0: readerType = "HTKMLFReader" -MPI Rank 0: readMethod = "blockRandomize" -MPI Rank 0: miniBatchMode = "partial" -MPI Rank 0: randomize = "auto" -MPI Rank 0: verbosity = 0 -MPI Rank 0: useMersenneTwisterRand=true -MPI Rank 0: features = [ -MPI Rank 0: dim = 363 -MPI Rank 0: type = "real" -MPI Rank 0: scpFile = "glob_0000.scp" -MPI Rank 0: ] -MPI Rank 0: labels = [ -MPI Rank 0: mlfFile = "$DataDir$/glob_0000.mlf" -MPI Rank 0: labelMappingFile = "$DataDir$/state.list" -MPI Rank 0: labelDim = 132 -MPI Rank 0: labelType = "category" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 0: DeviceId=0 -MPI Rank 0: timestamping=true -MPI Rank 0: numCPUThreads=8 -MPI Rank 0: precision=double -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:20: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<< -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:20: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 0: 08/16/2016 03:19:20: precision = "float" -MPI Rank 0: command = speechTrain -MPI Rank 0: deviceId = 0 -MPI Rank 0: parallelTrain = true -MPI Rank 0: speechTrain = [ -MPI Rank 0: action = "train" -MPI Rank 0: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn" -MPI Rank 0: deviceId = 0 -MPI Rank 0: traceLevel = 1 -MPI Rank 0: SimpleNetworkBuilder = [ -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 0: evalCriterion = "ClassificationError" -MPI Rank 0: layerTypes = "Sigmoid" -MPI Rank 0: initValueScale = 1.0 -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: uniformInit = true -MPI Rank 0: needPrior = true -MPI Rank 0: ] -MPI Rank 0: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = 'CE' -MPI Rank 0: evalCriterion = 'Err' -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: L = Length(layerSizes)-1 // number of model layers -MPI Rank 0: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 0: featNorm = if applyMeanVarNorm -MPI Rank 0: then MeanVarNorm(features) -MPI Rank 0: else features -MPI Rank 0: layers[layer:1..L-1] = if layer > 1 -MPI Rank 0: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 0: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 0: CE = if trainingCriterion == 'CE' -MPI Rank 0: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 0: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 0: Err = if evalCriterion == 'Err' then -MPI Rank 0: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 0: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 0: logPrior = LogPrior(labels) -MPI Rank 0: // TODO: how to add a tag to an infix operation? -MPI Rank 0: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 0: ] -MPI Rank 0: SGD = [ -MPI Rank 0: epochSize = 20480 -MPI Rank 0: minibatchSize = 64:256:1024 -MPI Rank 0: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 0: numMBsToShowResult = 10 -MPI Rank 0: momentumPerMB = 0.9:0.656119 -MPI Rank 0: dropoutRate = 0.0 -MPI Rank 0: maxEpochs = 3 -MPI Rank 0: keepCheckPointFiles = true -MPI Rank 0: clippingThresholdPerSample = 1#INF -MPI Rank 0: ParallelTrain = [ -MPI Rank 0: parallelizationMethod = "DataParallelSGD" -MPI Rank 0: distributedMBReading = true -MPI Rank 0: DataParallelSGD = [ -MPI Rank 0: gradientBits = 32 -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: AutoAdjust = [ -MPI Rank 0: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 0: loadBestModel = true -MPI Rank 0: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 0: learnRateDecreaseFactor = 0.5 -MPI Rank 0: learnRateIncreaseFactor = 1.382 -MPI Rank 0: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: reader = [ -MPI Rank 0: readerType = "HTKMLFReader" -MPI Rank 0: readMethod = "blockRandomize" -MPI Rank 0: miniBatchMode = "partial" -MPI Rank 0: randomize = "auto" -MPI Rank 0: verbosity = 0 -MPI Rank 0: useMersenneTwisterRand=true -MPI Rank 0: features = [ -MPI Rank 0: dim = 363 -MPI Rank 0: type = "real" -MPI Rank 0: scpFile = "glob_0000.scp" -MPI Rank 0: ] -MPI Rank 0: labels = [ -MPI Rank 0: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 0: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 0: labelDim = 132 -MPI Rank 0: labelType = "category" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 0: DeviceId=0 -MPI Rank 0: timestamping=true -MPI Rank 0: numCPUThreads=8 -MPI Rank 0: precision=double -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:20: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:20: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 0: configparameters: cntk.cntk:command=speechTrain -MPI Rank 0: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 0: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: configparameters: cntk.cntk:deviceId=0 -MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=8 -MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true -MPI Rank 0: configparameters: cntk.cntk:precision=double -MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 0: configparameters: cntk.cntk:speechTrain=[ -MPI Rank 0: action = "train" -MPI Rank 0: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn" -MPI Rank 0: deviceId = 0 -MPI Rank 0: traceLevel = 1 -MPI Rank 0: SimpleNetworkBuilder = [ -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 0: evalCriterion = "ClassificationError" -MPI Rank 0: layerTypes = "Sigmoid" -MPI Rank 0: initValueScale = 1.0 -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: uniformInit = true -MPI Rank 0: needPrior = true -MPI Rank 0: ] -MPI Rank 0: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = 'CE' -MPI Rank 0: evalCriterion = 'Err' -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: L = Length(layerSizes)-1 // number of model layers -MPI Rank 0: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 0: featNorm = if applyMeanVarNorm -MPI Rank 0: then MeanVarNorm(features) -MPI Rank 0: else features -MPI Rank 0: layers[layer:1..L-1] = if layer > 1 -MPI Rank 0: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 0: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 0: CE = if trainingCriterion == 'CE' -MPI Rank 0: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 0: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 0: Err = if evalCriterion == 'Err' then -MPI Rank 0: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 0: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 0: logPrior = LogPrior(labels) -MPI Rank 0: // TODO: how to add a tag to an infix operation? -MPI Rank 0: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 0: ] -MPI Rank 0: SGD = [ -MPI Rank 0: epochSize = 20480 -MPI Rank 0: minibatchSize = 64:256:1024 -MPI Rank 0: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 0: numMBsToShowResult = 10 -MPI Rank 0: momentumPerMB = 0.9:0.656119 -MPI Rank 0: dropoutRate = 0.0 -MPI Rank 0: maxEpochs = 3 -MPI Rank 0: keepCheckPointFiles = true -MPI Rank 0: clippingThresholdPerSample = 1#INF -MPI Rank 0: ParallelTrain = [ -MPI Rank 0: parallelizationMethod = "DataParallelSGD" -MPI Rank 0: distributedMBReading = true -MPI Rank 0: DataParallelSGD = [ -MPI Rank 0: gradientBits = 32 -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: AutoAdjust = [ -MPI Rank 0: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 0: loadBestModel = true -MPI Rank 0: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 0: learnRateDecreaseFactor = 0.5 -MPI Rank 0: learnRateIncreaseFactor = 1.382 -MPI Rank 0: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: reader = [ -MPI Rank 0: readerType = "HTKMLFReader" -MPI Rank 0: readMethod = "blockRandomize" -MPI Rank 0: miniBatchMode = "partial" -MPI Rank 0: randomize = "auto" -MPI Rank 0: verbosity = 0 -MPI Rank 0: useMersenneTwisterRand=true -MPI Rank 0: features = [ -MPI Rank 0: dim = 363 -MPI Rank 0: type = "real" -MPI Rank 0: scpFile = "glob_0000.scp" -MPI Rank 0: ] -MPI Rank 0: labels = [ -MPI Rank 0: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 0: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 0: labelDim = 132 -MPI Rank 0: labelType = "category" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 0: -MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr -MPI Rank 0: configparameters: cntk.cntk:timestamping=true -MPI Rank 0: 08/16/2016 03:19:20: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 0: 08/16/2016 03:19:20: Commands: speechTrain -MPI Rank 0: 08/16/2016 03:19:20: Precision = "double" -MPI Rank 0: 08/16/2016 03:19:20: Using 8 CPU threads. -MPI Rank 0: 08/16/2016 03:19:20: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn -MPI Rank 0: 08/16/2016 03:19:20: CNTKCommandTrainInfo: speechTrain : 3 -MPI Rank 0: 08/16/2016 03:19:20: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3 -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:20: ############################################################################## -MPI Rank 0: 08/16/2016 03:19:20: # # -MPI Rank 0: 08/16/2016 03:19:20: # Action "train" # -MPI Rank 0: 08/16/2016 03:19:20: # # -MPI Rank 0: 08/16/2016 03:19:20: ############################################################################## -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:20: CNTKCommandTrainBegin: speechTrain -MPI Rank 0: SimpleNetworkBuilder Using GPU 0 -MPI Rank 0: reading script file glob_0000.scp ... 948 entries -MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 0: label set 0: 129 classes -MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:21: Creating virgin network. -MPI Rank 0: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000. -MPI Rank 0: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false). -MPI Rank 0: Microsoft::MSR::CNTK::GPUMatrix::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8 -MPI Rank 0: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 0: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 0: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000. -MPI Rank 0: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false). -MPI Rank 0: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 0: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 0: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000. -MPI Rank 0: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false). -MPI Rank 0: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 0: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 0: -MPI Rank 0: Post-processing network... -MPI Rank 0: -MPI Rank 0: 7 roots: -MPI Rank 0: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() -MPI Rank 0: EvalClassificationError = ClassificationError() -MPI Rank 0: InvStdOfFeatures = InvStdDev() -MPI Rank 0: MeanOfFeatures = Mean() -MPI Rank 0: PosteriorProb = Softmax() -MPI Rank 0: Prior = Mean() -MPI Rank 0: ScaledLogLikelihood = Minus() -MPI Rank 0: -MPI Rank 0: Validating network. 25 nodes to process in pass 1. -MPI Rank 0: -MPI Rank 0: Validating --> labels = InputValue() : -> [132 x *] -MPI Rank 0: Validating --> W2 = LearnableParameter() : -> [132 x 512] -MPI Rank 0: Validating --> W1 = LearnableParameter() : -> [512 x 512] -MPI Rank 0: Validating --> W0 = LearnableParameter() : -> [512 x 363] -MPI Rank 0: Validating --> features = InputValue() : -> [363 x *] -MPI Rank 0: Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363] -MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363] -MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *] -MPI Rank 0: Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *] -MPI Rank 0: Validating --> B0 = LearnableParameter() : -> [512 x 1] -MPI Rank 0: Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 0: Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 0: Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 0: Validating --> B1 = LearnableParameter() : -> [512 x 1] -MPI Rank 0: Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 0: Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 0: Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *] -MPI Rank 0: Validating --> B2 = LearnableParameter() : -> [132 x 1] -MPI Rank 0: Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *] -MPI Rank 0: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 0: Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 0: Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *] -MPI Rank 0: Validating --> Prior = Mean (labels) : [132 x *] -> [132] -MPI Rank 0: Validating --> LogOfPrior = Log (Prior) : [132] -> [132] -MPI Rank 0: Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *] -MPI Rank 0: -MPI Rank 0: Validating network. 17 nodes to process in pass 2. -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: Validating network, final pass. -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input data. -MPI Rank 0: -MPI Rank 0: Post-processing network complete. -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:22: Created model with 25 nodes on GPU 0. -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:22: Training criterion node(s): -MPI Rank 0: 08/16/2016 03:19:22: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:22: Evaluation criterion node(s): -MPI Rank 0: 08/16/2016 03:19:22: EvalClassificationError = ClassificationError -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: Allocating matrices for forward and/or backward propagation. -MPI Rank 0: -MPI Rank 0: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 0: -MPI Rank 0: { B1 : [512 x 1] (gradient) -MPI Rank 0: H2 : [512 x 1 x *] (gradient) -MPI Rank 0: HLast : [132 x 1 x *] (gradient) } -MPI Rank 0: { H1 : [512 x 1 x *] -MPI Rank 0: W0*features : [512 x *] (gradient) } -MPI Rank 0: { H2 : [512 x 1 x *] -MPI Rank 0: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 0: { B0 : [512 x 1] (gradient) -MPI Rank 0: H1 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 0: W2*H1 : [132 x 1 x *] } -MPI Rank 0: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1 : [512 x 1 x *] } -MPI Rank 0: { W1 : [512 x 512] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 0: { HLast : [132 x 1 x *] -MPI Rank 0: W2 : [132 x 512] (gradient) } -MPI Rank 0: { W0 : [512 x 363] (gradient) -MPI Rank 0: W0*features+B0 : [512 x 1 x *] } -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:22: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:22: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 08/16/2016 03:19:22: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 08/16/2016 03:19:22: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 0: 08/16/2016 03:19:22: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 0: 08/16/2016 03:19:22: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 0: 08/16/2016 03:19:22: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:22: Precomputing --> 3 PreCompute nodes found. -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:22: MeanOfFeatures = Mean() -MPI Rank 0: 08/16/2016 03:19:22: InvStdOfFeatures = InvStdDev() -MPI Rank 0: 08/16/2016 03:19:22: Prior = Mean() -MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:27: Precomputing --> Completed. -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:28: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:28: Starting minibatch loop. -MPI Rank 0: 08/16/2016 03:19:28: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.0934s; samplesPerSecond = 6851.1 -MPI Rank 0: 08/16/2016 03:19:28: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.0945s; samplesPerSecond = 6775.2 -MPI Rank 0: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.1008s; samplesPerSecond = 6351.3 -MPI Rank 0: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.73643568 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.0851s; samplesPerSecond = 7522.4 -MPI Rank 0: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83079081 * 640; EvalClassificationError = 0.88281250 * 640; time = 0.1101s; samplesPerSecond = 5814.1 -MPI Rank 0: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71437689 * 640; EvalClassificationError = 0.86875000 * 640; time = 0.0879s; samplesPerSecond = 7280.4 -MPI Rank 0: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.42186230 * 640; EvalClassificationError = 0.79062500 * 640; time = 0.0876s; samplesPerSecond = 7306.4 -MPI Rank 0: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53658053 * 640; EvalClassificationError = 0.82031250 * 640; time = 0.1010s; samplesPerSecond = 6338.0 -MPI Rank 0: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.49758017 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.0915s; samplesPerSecond = 6997.4 -MPI Rank 0: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39996308 * 640; EvalClassificationError = 0.80468750 * 640; time = 0.1070s; samplesPerSecond = 5978.9 -MPI Rank 0: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.49445773 * 640; EvalClassificationError = 0.82500000 * 640; time = 0.0879s; samplesPerSecond = 7277.4 -MPI Rank 0: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.26676999 * 640; EvalClassificationError = 0.79218750 * 640; time = 0.0881s; samplesPerSecond = 7264.4 -MPI Rank 0: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.18870173 * 640; EvalClassificationError = 0.78906250 * 640; time = 0.0948s; samplesPerSecond = 6750.9 -MPI Rank 0: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.05687264 * 640; EvalClassificationError = 0.74687500 * 640; time = 0.0913s; samplesPerSecond = 7013.3 -MPI Rank 0: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.95594569 * 640; EvalClassificationError = 0.71875000 * 640; time = 0.0928s; samplesPerSecond = 6897.2 -MPI Rank 0: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.10219604 * 640; EvalClassificationError = 0.74062500 * 640; time = 0.1049s; samplesPerSecond = 6100.2 -MPI Rank 0: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.80745014 * 640; EvalClassificationError = 0.70625000 * 640; time = 0.0865s; samplesPerSecond = 7398.0 -MPI Rank 0: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72061842 * 640; EvalClassificationError = 0.65468750 * 640; time = 0.0889s; samplesPerSecond = 7202.8 -MPI Rank 0: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80425747 * 640; EvalClassificationError = 0.71718750 * 640; time = 0.0971s; samplesPerSecond = 6594.3 -MPI Rank 0: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.71253068 * 640; EvalClassificationError = 0.67812500 * 640; time = 0.0983s; samplesPerSecond = 6511.5 -MPI Rank 0: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.59360399 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.1010s; samplesPerSecond = 6339.3 -MPI Rank 0: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60386649 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.0944s; samplesPerSecond = 6782.8 -MPI Rank 0: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53706678 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.1063s; samplesPerSecond = 6018.7 -MPI Rank 0: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.56177343 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.1045s; samplesPerSecond = 6124.6 -MPI Rank 0: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.50118791 * 640; EvalClassificationError = 0.64218750 * 640; time = 0.0902s; samplesPerSecond = 7096.6 -MPI Rank 0: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.40119788 * 640; EvalClassificationError = 0.62500000 * 640; time = 0.1010s; samplesPerSecond = 6336.6 -MPI Rank 0: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27491503 * 640; EvalClassificationError = 0.58906250 * 640; time = 0.0913s; samplesPerSecond = 7013.5 -MPI Rank 0: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.51724208 * 640; EvalClassificationError = 0.65781250 * 640; time = 0.0944s; samplesPerSecond = 6776.6 -MPI Rank 0: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27797542 * 640; EvalClassificationError = 0.59687500 * 640; time = 0.0897s; samplesPerSecond = 7136.0 -MPI Rank 0: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26017740 * 640; EvalClassificationError = 0.60937500 * 640; time = 0.0977s; samplesPerSecond = 6553.1 -MPI Rank 0: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24735342 * 640; EvalClassificationError = 0.58437500 * 640; time = 0.1008s; samplesPerSecond = 6348.1 -MPI Rank 0: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.23665382 * 640; EvalClassificationError = 0.60625000 * 640; time = 0.0993s; samplesPerSecond = 6443.8 -MPI Rank 0: 08/16/2016 03:19:31: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.03815141 * 20480; EvalClassificationError = 0.73432617 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.07547s -MPI Rank 0: 08/16/2016 03:19:31: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn.1' -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:31: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. -MPI Rank 0: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.2084s; samplesPerSecond = 12286.1 -MPI Rank 0: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1804s; samplesPerSecond = 14191.5 -MPI Rank 0: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1704s; samplesPerSecond = 15022.6 -MPI Rank 0: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.06745115 * 2560; EvalClassificationError = 0.56171875 * 2560; time = 0.1680s; samplesPerSecond = 15242.1 -MPI Rank 0: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.06705242 * 2560; EvalClassificationError = 0.55976563 * 2560; time = 0.1697s; samplesPerSecond = 15081.4 -MPI Rank 0: 08/16/2016 03:19:33: Epoch[ 2 of 3]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.00136482 * 2560; EvalClassificationError = 0.54531250 * 2560; time = 0.1754s; samplesPerSecond = 14597.3 -MPI Rank 0: 08/16/2016 03:19:33: Epoch[ 2 of 3]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 1.99508116 * 2560; EvalClassificationError = 0.54765625 * 2560; time = 0.1695s; samplesPerSecond = 15105.3 -MPI Rank 0: 08/16/2016 03:19:33: Epoch[ 2 of 3]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 1.99964996 * 2560; EvalClassificationError = 0.55507812 * 2560; time = 0.1716s; samplesPerSecond = 14922.7 -MPI Rank 0: 08/16/2016 03:19:33: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.07215304 * 20480; EvalClassificationError = 0.56293945 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.42511s -MPI Rank 0: 08/16/2016 03:19:33: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn.2' -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:33: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. -MPI Rank 0: 08/16/2016 03:19:33: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.3564s; samplesPerSecond = 28732.2 -MPI Rank 0: 08/16/2016 03:19:34: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.3227s; samplesPerSecond = 31728.4 -MPI Rank 0: 08/16/2016 03:19:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.689726s -MPI Rank 0: 08/16/2016 03:19:34: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn' -MPI Rank 0: 08/16/2016 03:19:34: CNTKCommandTrainEnd: speechTrain -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:34: Action "train" complete. -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:34: __COMPLETED__ -MPI Rank 0: ~MPIWrapper -MPI Rank 1: 08/16/2016 03:19:18: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr_speechTrain.logrank1 -MPI Rank 1: 08/16/2016 03:19:18: ------------------------------------------------------------------- -MPI Rank 1: 08/16/2016 03:19:18: Build info: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:18: Built time: Aug 16 2016 03:09:16 -MPI Rank 1: 08/16/2016 03:19:18: Last modified date: Fri Aug 12 05:28:23 2016 -MPI Rank 1: 08/16/2016 03:19:18: Build type: Release -MPI Rank 1: 08/16/2016 03:19:18: Build target: GPU -MPI Rank 1: 08/16/2016 03:19:18: With 1bit-SGD: yes -MPI Rank 1: 08/16/2016 03:19:18: Math lib: mkl -MPI Rank 1: 08/16/2016 03:19:18: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 -MPI Rank 1: 08/16/2016 03:19:18: CUB_PATH: c:\src\cub-1.4.1 -MPI Rank 1: 08/16/2016 03:19:18: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda -MPI Rank 1: 08/16/2016 03:19:18: Build Branch: HEAD -MPI Rank 1: 08/16/2016 03:19:18: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 -MPI Rank 1: 08/16/2016 03:19:18: Built by svcphil on Philly-Pool1 -MPI Rank 1: 08/16/2016 03:19:18: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -MPI Rank 1: 08/16/2016 03:19:18: ------------------------------------------------------------------- -MPI Rank 1: 08/16/2016 03:19:21: ------------------------------------------------------------------- -MPI Rank 1: 08/16/2016 03:19:21: GPU info: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:21: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 1: 08/16/2016 03:19:21: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 1: 08/16/2016 03:19:21: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 1: 08/16/2016 03:19:21: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 1: 08/16/2016 03:19:21: ------------------------------------------------------------------- -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:21: Running on DPHAIM-24 at 2016/08/16 03:19:21 -MPI Rank 1: 08/16/2016 03:19:21: Command line: -MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu DeviceId=0 timestamping=true numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:21: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>> -MPI Rank 1: 08/16/2016 03:19:21: precision = "float" -MPI Rank 1: command = speechTrain -MPI Rank 1: deviceId = $DeviceId$ -MPI Rank 1: parallelTrain = true -MPI Rank 1: speechTrain = [ -MPI Rank 1: action = "train" -MPI Rank 1: modelPath = "$RunDir$/models/cntkSpeech.dnn" -MPI Rank 1: deviceId = $DeviceId$ -MPI Rank 1: traceLevel = 1 -MPI Rank 1: SimpleNetworkBuilder = [ -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 1: evalCriterion = "ClassificationError" -MPI Rank 1: layerTypes = "Sigmoid" -MPI Rank 1: initValueScale = 1.0 -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: uniformInit = true -MPI Rank 1: needPrior = true -MPI Rank 1: ] -MPI Rank 1: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = 'CE' -MPI Rank 1: evalCriterion = 'Err' -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: L = Length(layerSizes)-1 // number of model layers -MPI Rank 1: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 1: featNorm = if applyMeanVarNorm -MPI Rank 1: then MeanVarNorm(features) -MPI Rank 1: else features -MPI Rank 1: layers[layer:1..L-1] = if layer > 1 -MPI Rank 1: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 1: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 1: CE = if trainingCriterion == 'CE' -MPI Rank 1: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 1: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 1: Err = if evalCriterion == 'Err' then -MPI Rank 1: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 1: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 1: logPrior = LogPrior(labels) -MPI Rank 1: // TODO: how to add a tag to an infix operation? -MPI Rank 1: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 1: ] -MPI Rank 1: SGD = [ -MPI Rank 1: epochSize = 20480 -MPI Rank 1: minibatchSize = 64:256:1024 -MPI Rank 1: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 1: numMBsToShowResult = 10 -MPI Rank 1: momentumPerMB = 0.9:0.656119 -MPI Rank 1: dropoutRate = 0.0 -MPI Rank 1: maxEpochs = 3 -MPI Rank 1: keepCheckPointFiles = true -MPI Rank 1: clippingThresholdPerSample = 1#INF -MPI Rank 1: ParallelTrain = [ -MPI Rank 1: parallelizationMethod = "DataParallelSGD" -MPI Rank 1: distributedMBReading = true -MPI Rank 1: DataParallelSGD = [ -MPI Rank 1: gradientBits = 32 -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: AutoAdjust = [ -MPI Rank 1: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 1: loadBestModel = true -MPI Rank 1: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 1: learnRateDecreaseFactor = 0.5 -MPI Rank 1: learnRateIncreaseFactor = 1.382 -MPI Rank 1: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: reader = [ -MPI Rank 1: readerType = "HTKMLFReader" -MPI Rank 1: readMethod = "blockRandomize" -MPI Rank 1: miniBatchMode = "partial" -MPI Rank 1: randomize = "auto" -MPI Rank 1: verbosity = 0 -MPI Rank 1: useMersenneTwisterRand=true -MPI Rank 1: features = [ -MPI Rank 1: dim = 363 -MPI Rank 1: type = "real" -MPI Rank 1: scpFile = "glob_0000.scp" -MPI Rank 1: ] -MPI Rank 1: labels = [ -MPI Rank 1: mlfFile = "$DataDir$/glob_0000.mlf" -MPI Rank 1: labelMappingFile = "$DataDir$/state.list" -MPI Rank 1: labelDim = 132 -MPI Rank 1: labelType = "category" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 1: DeviceId=0 -MPI Rank 1: timestamping=true -MPI Rank 1: numCPUThreads=8 -MPI Rank 1: precision=double -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:21: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<< -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:21: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 1: 08/16/2016 03:19:21: precision = "float" -MPI Rank 1: command = speechTrain -MPI Rank 1: deviceId = 0 -MPI Rank 1: parallelTrain = true -MPI Rank 1: speechTrain = [ -MPI Rank 1: action = "train" -MPI Rank 1: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn" -MPI Rank 1: deviceId = 0 -MPI Rank 1: traceLevel = 1 -MPI Rank 1: SimpleNetworkBuilder = [ -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 1: evalCriterion = "ClassificationError" -MPI Rank 1: layerTypes = "Sigmoid" -MPI Rank 1: initValueScale = 1.0 -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: uniformInit = true -MPI Rank 1: needPrior = true -MPI Rank 1: ] -MPI Rank 1: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = 'CE' -MPI Rank 1: evalCriterion = 'Err' -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: L = Length(layerSizes)-1 // number of model layers -MPI Rank 1: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 1: featNorm = if applyMeanVarNorm -MPI Rank 1: then MeanVarNorm(features) -MPI Rank 1: else features -MPI Rank 1: layers[layer:1..L-1] = if layer > 1 -MPI Rank 1: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 1: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 1: CE = if trainingCriterion == 'CE' -MPI Rank 1: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 1: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 1: Err = if evalCriterion == 'Err' then -MPI Rank 1: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 1: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 1: logPrior = LogPrior(labels) -MPI Rank 1: // TODO: how to add a tag to an infix operation? -MPI Rank 1: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 1: ] -MPI Rank 1: SGD = [ -MPI Rank 1: epochSize = 20480 -MPI Rank 1: minibatchSize = 64:256:1024 -MPI Rank 1: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 1: numMBsToShowResult = 10 -MPI Rank 1: momentumPerMB = 0.9:0.656119 -MPI Rank 1: dropoutRate = 0.0 -MPI Rank 1: maxEpochs = 3 -MPI Rank 1: keepCheckPointFiles = true -MPI Rank 1: clippingThresholdPerSample = 1#INF -MPI Rank 1: ParallelTrain = [ -MPI Rank 1: parallelizationMethod = "DataParallelSGD" -MPI Rank 1: distributedMBReading = true -MPI Rank 1: DataParallelSGD = [ -MPI Rank 1: gradientBits = 32 -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: AutoAdjust = [ -MPI Rank 1: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 1: loadBestModel = true -MPI Rank 1: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 1: learnRateDecreaseFactor = 0.5 -MPI Rank 1: learnRateIncreaseFactor = 1.382 -MPI Rank 1: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: reader = [ -MPI Rank 1: readerType = "HTKMLFReader" -MPI Rank 1: readMethod = "blockRandomize" -MPI Rank 1: miniBatchMode = "partial" -MPI Rank 1: randomize = "auto" -MPI Rank 1: verbosity = 0 -MPI Rank 1: useMersenneTwisterRand=true -MPI Rank 1: features = [ -MPI Rank 1: dim = 363 -MPI Rank 1: type = "real" -MPI Rank 1: scpFile = "glob_0000.scp" -MPI Rank 1: ] -MPI Rank 1: labels = [ -MPI Rank 1: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 1: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 1: labelDim = 132 -MPI Rank 1: labelType = "category" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 1: DeviceId=0 -MPI Rank 1: timestamping=true -MPI Rank 1: numCPUThreads=8 -MPI Rank 1: precision=double -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:21: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:21: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 1: configparameters: cntk.cntk:command=speechTrain -MPI Rank 1: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 1: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: configparameters: cntk.cntk:deviceId=0 -MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=8 -MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true -MPI Rank 1: configparameters: cntk.cntk:precision=double -MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 1: configparameters: cntk.cntk:speechTrain=[ -MPI Rank 1: action = "train" -MPI Rank 1: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn" -MPI Rank 1: deviceId = 0 -MPI Rank 1: traceLevel = 1 -MPI Rank 1: SimpleNetworkBuilder = [ -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 1: evalCriterion = "ClassificationError" -MPI Rank 1: layerTypes = "Sigmoid" -MPI Rank 1: initValueScale = 1.0 -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: uniformInit = true -MPI Rank 1: needPrior = true -MPI Rank 1: ] -MPI Rank 1: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = 'CE' -MPI Rank 1: evalCriterion = 'Err' -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: L = Length(layerSizes)-1 // number of model layers -MPI Rank 1: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 1: featNorm = if applyMeanVarNorm -MPI Rank 1: then MeanVarNorm(features) -MPI Rank 1: else features -MPI Rank 1: layers[layer:1..L-1] = if layer > 1 -MPI Rank 1: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 1: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 1: CE = if trainingCriterion == 'CE' -MPI Rank 1: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 1: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 1: Err = if evalCriterion == 'Err' then -MPI Rank 1: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 1: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 1: logPrior = LogPrior(labels) -MPI Rank 1: // TODO: how to add a tag to an infix operation? -MPI Rank 1: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 1: ] -MPI Rank 1: SGD = [ -MPI Rank 1: epochSize = 20480 -MPI Rank 1: minibatchSize = 64:256:1024 -MPI Rank 1: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 1: numMBsToShowResult = 10 -MPI Rank 1: momentumPerMB = 0.9:0.656119 -MPI Rank 1: dropoutRate = 0.0 -MPI Rank 1: maxEpochs = 3 -MPI Rank 1: keepCheckPointFiles = true -MPI Rank 1: clippingThresholdPerSample = 1#INF -MPI Rank 1: ParallelTrain = [ -MPI Rank 1: parallelizationMethod = "DataParallelSGD" -MPI Rank 1: distributedMBReading = true -MPI Rank 1: DataParallelSGD = [ -MPI Rank 1: gradientBits = 32 -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: AutoAdjust = [ -MPI Rank 1: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 1: loadBestModel = true -MPI Rank 1: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 1: learnRateDecreaseFactor = 0.5 -MPI Rank 1: learnRateIncreaseFactor = 1.382 -MPI Rank 1: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: reader = [ -MPI Rank 1: readerType = "HTKMLFReader" -MPI Rank 1: readMethod = "blockRandomize" -MPI Rank 1: miniBatchMode = "partial" -MPI Rank 1: randomize = "auto" -MPI Rank 1: verbosity = 0 -MPI Rank 1: useMersenneTwisterRand=true -MPI Rank 1: features = [ -MPI Rank 1: dim = 363 -MPI Rank 1: type = "real" -MPI Rank 1: scpFile = "glob_0000.scp" -MPI Rank 1: ] -MPI Rank 1: labels = [ -MPI Rank 1: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 1: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 1: labelDim = 132 -MPI Rank 1: labelType = "category" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 1: -MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr -MPI Rank 1: configparameters: cntk.cntk:timestamping=true -MPI Rank 1: 08/16/2016 03:19:21: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 1: 08/16/2016 03:19:21: Commands: speechTrain -MPI Rank 1: 08/16/2016 03:19:21: Precision = "double" -MPI Rank 1: 08/16/2016 03:19:21: Using 8 CPU threads. -MPI Rank 1: 08/16/2016 03:19:21: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn -MPI Rank 1: 08/16/2016 03:19:21: CNTKCommandTrainInfo: speechTrain : 3 -MPI Rank 1: 08/16/2016 03:19:21: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3 -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:21: ############################################################################## -MPI Rank 1: 08/16/2016 03:19:21: # # -MPI Rank 1: 08/16/2016 03:19:21: # Action "train" # -MPI Rank 1: 08/16/2016 03:19:21: # # -MPI Rank 1: 08/16/2016 03:19:21: ############################################################################## -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:21: CNTKCommandTrainBegin: speechTrain -MPI Rank 1: SimpleNetworkBuilder Using GPU 0 -MPI Rank 1: reading script file glob_0000.scp ... 948 entries -MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 1: label set 0: 129 classes -MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:22: Creating virgin network. -MPI Rank 1: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000. -MPI Rank 1: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false). -MPI Rank 1: Microsoft::MSR::CNTK::GPUMatrix::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8 -MPI Rank 1: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 1: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 1: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000. -MPI Rank 1: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false). -MPI Rank 1: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 1: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 1: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000. -MPI Rank 1: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false). -MPI Rank 1: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 1: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 1: -MPI Rank 1: Post-processing network... -MPI Rank 1: -MPI Rank 1: 7 roots: -MPI Rank 1: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() -MPI Rank 1: EvalClassificationError = ClassificationError() -MPI Rank 1: InvStdOfFeatures = InvStdDev() -MPI Rank 1: MeanOfFeatures = Mean() -MPI Rank 1: PosteriorProb = Softmax() -MPI Rank 1: Prior = Mean() -MPI Rank 1: ScaledLogLikelihood = Minus() -MPI Rank 1: -MPI Rank 1: Validating network. 25 nodes to process in pass 1. -MPI Rank 1: -MPI Rank 1: Validating --> labels = InputValue() : -> [132 x *] -MPI Rank 1: Validating --> W2 = LearnableParameter() : -> [132 x 512] -MPI Rank 1: Validating --> W1 = LearnableParameter() : -> [512 x 512] -MPI Rank 1: Validating --> W0 = LearnableParameter() : -> [512 x 363] -MPI Rank 1: Validating --> features = InputValue() : -> [363 x *] -MPI Rank 1: Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363] -MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363] -MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *] -MPI Rank 1: Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *] -MPI Rank 1: Validating --> B0 = LearnableParameter() : -> [512 x 1] -MPI Rank 1: Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 1: Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 1: Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 1: Validating --> B1 = LearnableParameter() : -> [512 x 1] -MPI Rank 1: Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 1: Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 1: Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *] -MPI Rank 1: Validating --> B2 = LearnableParameter() : -> [132 x 1] -MPI Rank 1: Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *] -MPI Rank 1: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 1: Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 1: Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *] -MPI Rank 1: Validating --> Prior = Mean (labels) : [132 x *] -> [132] -MPI Rank 1: Validating --> LogOfPrior = Log (Prior) : [132] -> [132] -MPI Rank 1: Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *] -MPI Rank 1: -MPI Rank 1: Validating network. 17 nodes to process in pass 2. -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: Validating network, final pass. -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input data. -MPI Rank 1: -MPI Rank 1: Post-processing network complete. -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:23: Created model with 25 nodes on GPU 0. -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:23: Training criterion node(s): -MPI Rank 1: 08/16/2016 03:19:23: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:23: Evaluation criterion node(s): -MPI Rank 1: 08/16/2016 03:19:23: EvalClassificationError = ClassificationError -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: Allocating matrices for forward and/or backward propagation. -MPI Rank 1: -MPI Rank 1: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 1: -MPI Rank 1: { B1 : [512 x 1] (gradient) -MPI Rank 1: H2 : [512 x 1 x *] (gradient) -MPI Rank 1: HLast : [132 x 1 x *] (gradient) } -MPI Rank 1: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1 : [512 x 1 x *] } -MPI Rank 1: { H2 : [512 x 1 x *] -MPI Rank 1: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 1: { H1 : [512 x 1 x *] -MPI Rank 1: W0*features : [512 x *] (gradient) } -MPI Rank 1: { B0 : [512 x 1] (gradient) -MPI Rank 1: H1 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 1: W2*H1 : [132 x 1 x *] } -MPI Rank 1: { HLast : [132 x 1 x *] -MPI Rank 1: W2 : [132 x 512] (gradient) } -MPI Rank 1: { W0 : [512 x 363] (gradient) -MPI Rank 1: W0*features+B0 : [512 x 1 x *] } -MPI Rank 1: { W1 : [512 x 512] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:23: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:23: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 08/16/2016 03:19:23: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 08/16/2016 03:19:23: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 1: 08/16/2016 03:19:23: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 1: 08/16/2016 03:19:23: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 1: 08/16/2016 03:19:23: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:23: Precomputing --> 3 PreCompute nodes found. -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:23: MeanOfFeatures = Mean() -MPI Rank 1: 08/16/2016 03:19:23: InvStdOfFeatures = InvStdDev() -MPI Rank 1: 08/16/2016 03:19:23: Prior = Mean() -MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:28: Precomputing --> Completed. -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:28: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:28: Starting minibatch loop. -MPI Rank 1: 08/16/2016 03:19:28: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.1401s; samplesPerSecond = 4569.0 -MPI Rank 1: 08/16/2016 03:19:28: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.0940s; samplesPerSecond = 6807.0 -MPI Rank 1: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.1207s; samplesPerSecond = 5302.3 -MPI Rank 1: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.73643568 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.0910s; samplesPerSecond = 7036.1 -MPI Rank 1: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83079081 * 640; EvalClassificationError = 0.88281250 * 640; time = 0.1037s; samplesPerSecond = 6172.4 -MPI Rank 1: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71437689 * 640; EvalClassificationError = 0.86875000 * 640; time = 0.0947s; samplesPerSecond = 6758.8 -MPI Rank 1: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.42186230 * 640; EvalClassificationError = 0.79062500 * 640; time = 0.0943s; samplesPerSecond = 6788.4 -MPI Rank 1: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53658053 * 640; EvalClassificationError = 0.82031250 * 640; time = 0.1106s; samplesPerSecond = 5784.9 -MPI Rank 1: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.49758017 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.0876s; samplesPerSecond = 7307.6 -MPI Rank 1: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39996308 * 640; EvalClassificationError = 0.80468750 * 640; time = 0.1082s; samplesPerSecond = 5917.2 -MPI Rank 1: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.49445773 * 640; EvalClassificationError = 0.82500000 * 640; time = 0.1046s; samplesPerSecond = 6117.8 -MPI Rank 1: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.26676999 * 640; EvalClassificationError = 0.79218750 * 640; time = 0.1133s; samplesPerSecond = 5647.6 -MPI Rank 1: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.18870173 * 640; EvalClassificationError = 0.78906250 * 640; time = 0.0890s; samplesPerSecond = 7191.7 -MPI Rank 1: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.05687264 * 640; EvalClassificationError = 0.74687500 * 640; time = 0.1012s; samplesPerSecond = 6323.4 -MPI Rank 1: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.95594569 * 640; EvalClassificationError = 0.71875000 * 640; time = 0.1044s; samplesPerSecond = 6132.4 -MPI Rank 1: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.10219604 * 640; EvalClassificationError = 0.74062500 * 640; time = 0.1080s; samplesPerSecond = 5928.1 -MPI Rank 1: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.80745014 * 640; EvalClassificationError = 0.70625000 * 640; time = 0.1107s; samplesPerSecond = 5782.9 -MPI Rank 1: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72061842 * 640; EvalClassificationError = 0.65468750 * 640; time = 0.1113s; samplesPerSecond = 5752.3 -MPI Rank 1: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80425747 * 640; EvalClassificationError = 0.71718750 * 640; time = 0.1110s; samplesPerSecond = 5764.9 -MPI Rank 1: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.71253068 * 640; EvalClassificationError = 0.67812500 * 640; time = 0.1045s; samplesPerSecond = 6122.8 -MPI Rank 1: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.59360399 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.0979s; samplesPerSecond = 6535.5 -MPI Rank 1: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60386649 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.1039s; samplesPerSecond = 6161.4 -MPI Rank 1: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53706678 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.0918s; samplesPerSecond = 6972.7 -MPI Rank 1: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.56177343 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.0950s; samplesPerSecond = 6734.1 -MPI Rank 1: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.50118791 * 640; EvalClassificationError = 0.64218750 * 640; time = 0.1074s; samplesPerSecond = 5959.5 -MPI Rank 1: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.40119788 * 640; EvalClassificationError = 0.62500000 * 640; time = 0.0968s; samplesPerSecond = 6610.5 -MPI Rank 1: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27491503 * 640; EvalClassificationError = 0.58906250 * 640; time = 0.1041s; samplesPerSecond = 6148.1 -MPI Rank 1: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.51724208 * 640; EvalClassificationError = 0.65781250 * 640; time = 0.1035s; samplesPerSecond = 6185.8 -MPI Rank 1: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27797542 * 640; EvalClassificationError = 0.59687500 * 640; time = 0.0917s; samplesPerSecond = 6977.2 -MPI Rank 1: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26017740 * 640; EvalClassificationError = 0.60937500 * 640; time = 0.0773s; samplesPerSecond = 8284.0 -MPI Rank 1: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24735342 * 640; EvalClassificationError = 0.58437500 * 640; time = 0.0373s; samplesPerSecond = 17172.9 -MPI Rank 1: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.23665382 * 640; EvalClassificationError = 0.60625000 * 640; time = 0.0373s; samplesPerSecond = 17176.1 -MPI Rank 1: 08/16/2016 03:19:31: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.03815141 * 20480; EvalClassificationError = 0.73432617 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.16555s -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:31: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:32: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. -MPI Rank 1: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.2063s; samplesPerSecond = 12411.0 -MPI Rank 1: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1808s; samplesPerSecond = 14158.0 -MPI Rank 1: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1701s; samplesPerSecond = 15049.2 -MPI Rank 1: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.06745115 * 2560; EvalClassificationError = 0.56171875 * 2560; time = 0.1683s; samplesPerSecond = 15215.0 -MPI Rank 1: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.06705242 * 2560; EvalClassificationError = 0.55976563 * 2560; time = 0.1697s; samplesPerSecond = 15086.2 -MPI Rank 1: 08/16/2016 03:19:33: Epoch[ 2 of 3]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.00136482 * 2560; EvalClassificationError = 0.54531250 * 2560; time = 0.1754s; samplesPerSecond = 14598.1 -MPI Rank 1: 08/16/2016 03:19:33: Epoch[ 2 of 3]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 1.99508116 * 2560; EvalClassificationError = 0.54765625 * 2560; time = 0.1692s; samplesPerSecond = 15131.5 -MPI Rank 1: 08/16/2016 03:19:33: Epoch[ 2 of 3]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 1.99964996 * 2560; EvalClassificationError = 0.55507812 * 2560; time = 0.1718s; samplesPerSecond = 14902.8 -MPI Rank 1: 08/16/2016 03:19:33: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.07215304 * 20480; EvalClassificationError = 0.56293945 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.42565s -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:33: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. -MPI Rank 1: 08/16/2016 03:19:33: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.3534s; samplesPerSecond = 28972.3 -MPI Rank 1: 08/16/2016 03:19:34: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.3227s; samplesPerSecond = 31731.1 -MPI Rank 1: 08/16/2016 03:19:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.690282s -MPI Rank 1: 08/16/2016 03:19:34: CNTKCommandTrainEnd: speechTrain -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:34: Action "train" complete. -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:34: __COMPLETED__ -MPI Rank 1: ~MPIWrapper -MPI Rank 2: 08/16/2016 03:19:19: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr_speechTrain.logrank2 -MPI Rank 2: 08/16/2016 03:19:19: ------------------------------------------------------------------- -MPI Rank 2: 08/16/2016 03:19:19: Build info: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:19: Built time: Aug 16 2016 03:09:16 -MPI Rank 2: 08/16/2016 03:19:19: Last modified date: Fri Aug 12 05:28:23 2016 -MPI Rank 2: 08/16/2016 03:19:19: Build type: Release -MPI Rank 2: 08/16/2016 03:19:19: Build target: GPU -MPI Rank 2: 08/16/2016 03:19:19: With 1bit-SGD: yes -MPI Rank 2: 08/16/2016 03:19:19: Math lib: mkl -MPI Rank 2: 08/16/2016 03:19:19: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 -MPI Rank 2: 08/16/2016 03:19:19: CUB_PATH: c:\src\cub-1.4.1 -MPI Rank 2: 08/16/2016 03:19:19: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda -MPI Rank 2: 08/16/2016 03:19:19: Build Branch: HEAD -MPI Rank 2: 08/16/2016 03:19:19: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 -MPI Rank 2: 08/16/2016 03:19:19: Built by svcphil on Philly-Pool1 -MPI Rank 2: 08/16/2016 03:19:19: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -MPI Rank 2: 08/16/2016 03:19:19: ------------------------------------------------------------------- -MPI Rank 2: 08/16/2016 03:19:22: ------------------------------------------------------------------- -MPI Rank 2: 08/16/2016 03:19:22: GPU info: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:22: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 2: 08/16/2016 03:19:22: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 2: 08/16/2016 03:19:22: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 2: 08/16/2016 03:19:22: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 2: 08/16/2016 03:19:22: ------------------------------------------------------------------- -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:22: Running on DPHAIM-24 at 2016/08/16 03:19:22 -MPI Rank 2: 08/16/2016 03:19:22: Command line: -MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu DeviceId=0 timestamping=true numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:22: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>> -MPI Rank 2: 08/16/2016 03:19:22: precision = "float" -MPI Rank 2: command = speechTrain -MPI Rank 2: deviceId = $DeviceId$ -MPI Rank 2: parallelTrain = true -MPI Rank 2: speechTrain = [ -MPI Rank 2: action = "train" -MPI Rank 2: modelPath = "$RunDir$/models/cntkSpeech.dnn" -MPI Rank 2: deviceId = $DeviceId$ -MPI Rank 2: traceLevel = 1 -MPI Rank 2: SimpleNetworkBuilder = [ -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 2: evalCriterion = "ClassificationError" -MPI Rank 2: layerTypes = "Sigmoid" -MPI Rank 2: initValueScale = 1.0 -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: uniformInit = true -MPI Rank 2: needPrior = true -MPI Rank 2: ] -MPI Rank 2: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = 'CE' -MPI Rank 2: evalCriterion = 'Err' -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: L = Length(layerSizes)-1 // number of model layers -MPI Rank 2: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 2: featNorm = if applyMeanVarNorm -MPI Rank 2: then MeanVarNorm(features) -MPI Rank 2: else features -MPI Rank 2: layers[layer:1..L-1] = if layer > 1 -MPI Rank 2: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 2: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 2: CE = if trainingCriterion == 'CE' -MPI Rank 2: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 2: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 2: Err = if evalCriterion == 'Err' then -MPI Rank 2: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 2: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 2: logPrior = LogPrior(labels) -MPI Rank 2: // TODO: how to add a tag to an infix operation? -MPI Rank 2: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 2: ] -MPI Rank 2: SGD = [ -MPI Rank 2: epochSize = 20480 -MPI Rank 2: minibatchSize = 64:256:1024 -MPI Rank 2: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 2: numMBsToShowResult = 10 -MPI Rank 2: momentumPerMB = 0.9:0.656119 -MPI Rank 2: dropoutRate = 0.0 -MPI Rank 2: maxEpochs = 3 -MPI Rank 2: keepCheckPointFiles = true -MPI Rank 2: clippingThresholdPerSample = 1#INF -MPI Rank 2: ParallelTrain = [ -MPI Rank 2: parallelizationMethod = "DataParallelSGD" -MPI Rank 2: distributedMBReading = true -MPI Rank 2: DataParallelSGD = [ -MPI Rank 2: gradientBits = 32 -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: AutoAdjust = [ -MPI Rank 2: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 2: loadBestModel = true -MPI Rank 2: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 2: learnRateDecreaseFactor = 0.5 -MPI Rank 2: learnRateIncreaseFactor = 1.382 -MPI Rank 2: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: reader = [ -MPI Rank 2: readerType = "HTKMLFReader" -MPI Rank 2: readMethod = "blockRandomize" -MPI Rank 2: miniBatchMode = "partial" -MPI Rank 2: randomize = "auto" -MPI Rank 2: verbosity = 0 -MPI Rank 2: useMersenneTwisterRand=true -MPI Rank 2: features = [ -MPI Rank 2: dim = 363 -MPI Rank 2: type = "real" -MPI Rank 2: scpFile = "glob_0000.scp" -MPI Rank 2: ] -MPI Rank 2: labels = [ -MPI Rank 2: mlfFile = "$DataDir$/glob_0000.mlf" -MPI Rank 2: labelMappingFile = "$DataDir$/state.list" -MPI Rank 2: labelDim = 132 -MPI Rank 2: labelType = "category" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 2: DeviceId=0 -MPI Rank 2: timestamping=true -MPI Rank 2: numCPUThreads=8 -MPI Rank 2: precision=double -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:22: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<< -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:22: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 2: 08/16/2016 03:19:22: precision = "float" -MPI Rank 2: command = speechTrain -MPI Rank 2: deviceId = 0 -MPI Rank 2: parallelTrain = true -MPI Rank 2: speechTrain = [ -MPI Rank 2: action = "train" -MPI Rank 2: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn" -MPI Rank 2: deviceId = 0 -MPI Rank 2: traceLevel = 1 -MPI Rank 2: SimpleNetworkBuilder = [ -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 2: evalCriterion = "ClassificationError" -MPI Rank 2: layerTypes = "Sigmoid" -MPI Rank 2: initValueScale = 1.0 -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: uniformInit = true -MPI Rank 2: needPrior = true -MPI Rank 2: ] -MPI Rank 2: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = 'CE' -MPI Rank 2: evalCriterion = 'Err' -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: L = Length(layerSizes)-1 // number of model layers -MPI Rank 2: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 2: featNorm = if applyMeanVarNorm -MPI Rank 2: then MeanVarNorm(features) -MPI Rank 2: else features -MPI Rank 2: layers[layer:1..L-1] = if layer > 1 -MPI Rank 2: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 2: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 2: CE = if trainingCriterion == 'CE' -MPI Rank 2: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 2: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 2: Err = if evalCriterion == 'Err' then -MPI Rank 2: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 2: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 2: logPrior = LogPrior(labels) -MPI Rank 2: // TODO: how to add a tag to an infix operation? -MPI Rank 2: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 2: ] -MPI Rank 2: SGD = [ -MPI Rank 2: epochSize = 20480 -MPI Rank 2: minibatchSize = 64:256:1024 -MPI Rank 2: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 2: numMBsToShowResult = 10 -MPI Rank 2: momentumPerMB = 0.9:0.656119 -MPI Rank 2: dropoutRate = 0.0 -MPI Rank 2: maxEpochs = 3 -MPI Rank 2: keepCheckPointFiles = true -MPI Rank 2: clippingThresholdPerSample = 1#INF -MPI Rank 2: ParallelTrain = [ -MPI Rank 2: parallelizationMethod = "DataParallelSGD" -MPI Rank 2: distributedMBReading = true -MPI Rank 2: DataParallelSGD = [ -MPI Rank 2: gradientBits = 32 -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: AutoAdjust = [ -MPI Rank 2: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 2: loadBestModel = true -MPI Rank 2: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 2: learnRateDecreaseFactor = 0.5 -MPI Rank 2: learnRateIncreaseFactor = 1.382 -MPI Rank 2: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: reader = [ -MPI Rank 2: readerType = "HTKMLFReader" -MPI Rank 2: readMethod = "blockRandomize" -MPI Rank 2: miniBatchMode = "partial" -MPI Rank 2: randomize = "auto" -MPI Rank 2: verbosity = 0 -MPI Rank 2: useMersenneTwisterRand=true -MPI Rank 2: features = [ -MPI Rank 2: dim = 363 -MPI Rank 2: type = "real" -MPI Rank 2: scpFile = "glob_0000.scp" -MPI Rank 2: ] -MPI Rank 2: labels = [ -MPI Rank 2: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 2: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 2: labelDim = 132 -MPI Rank 2: labelType = "category" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 2: DeviceId=0 -MPI Rank 2: timestamping=true -MPI Rank 2: numCPUThreads=8 -MPI Rank 2: precision=double -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:22: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:22: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 2: configparameters: cntk.cntk:command=speechTrain -MPI Rank 2: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 2: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: configparameters: cntk.cntk:deviceId=0 -MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=8 -MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true -MPI Rank 2: configparameters: cntk.cntk:precision=double -MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu -MPI Rank 2: configparameters: cntk.cntk:speechTrain=[ -MPI Rank 2: action = "train" -MPI Rank 2: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn" -MPI Rank 2: deviceId = 0 -MPI Rank 2: traceLevel = 1 -MPI Rank 2: SimpleNetworkBuilder = [ -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 2: evalCriterion = "ClassificationError" -MPI Rank 2: layerTypes = "Sigmoid" -MPI Rank 2: initValueScale = 1.0 -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: uniformInit = true -MPI Rank 2: needPrior = true -MPI Rank 2: ] -MPI Rank 2: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = 'CE' -MPI Rank 2: evalCriterion = 'Err' -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: L = Length(layerSizes)-1 // number of model layers -MPI Rank 2: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 2: featNorm = if applyMeanVarNorm -MPI Rank 2: then MeanVarNorm(features) -MPI Rank 2: else features -MPI Rank 2: layers[layer:1..L-1] = if layer > 1 -MPI Rank 2: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 2: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 2: CE = if trainingCriterion == 'CE' -MPI Rank 2: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 2: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 2: Err = if evalCriterion == 'Err' then -MPI Rank 2: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 2: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 2: logPrior = LogPrior(labels) -MPI Rank 2: // TODO: how to add a tag to an infix operation? -MPI Rank 2: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 2: ] -MPI Rank 2: SGD = [ -MPI Rank 2: epochSize = 20480 -MPI Rank 2: minibatchSize = 64:256:1024 -MPI Rank 2: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 2: numMBsToShowResult = 10 -MPI Rank 2: momentumPerMB = 0.9:0.656119 -MPI Rank 2: dropoutRate = 0.0 -MPI Rank 2: maxEpochs = 3 -MPI Rank 2: keepCheckPointFiles = true -MPI Rank 2: clippingThresholdPerSample = 1#INF -MPI Rank 2: ParallelTrain = [ -MPI Rank 2: parallelizationMethod = "DataParallelSGD" -MPI Rank 2: distributedMBReading = true -MPI Rank 2: DataParallelSGD = [ -MPI Rank 2: gradientBits = 32 -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: AutoAdjust = [ -MPI Rank 2: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 2: loadBestModel = true -MPI Rank 2: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 2: learnRateDecreaseFactor = 0.5 -MPI Rank 2: learnRateIncreaseFactor = 1.382 -MPI Rank 2: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: reader = [ -MPI Rank 2: readerType = "HTKMLFReader" -MPI Rank 2: readMethod = "blockRandomize" -MPI Rank 2: miniBatchMode = "partial" -MPI Rank 2: randomize = "auto" -MPI Rank 2: verbosity = 0 -MPI Rank 2: useMersenneTwisterRand=true -MPI Rank 2: features = [ -MPI Rank 2: dim = 363 -MPI Rank 2: type = "real" -MPI Rank 2: scpFile = "glob_0000.scp" -MPI Rank 2: ] -MPI Rank 2: labels = [ -MPI Rank 2: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 2: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 2: labelDim = 132 -MPI Rank 2: labelType = "category" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 2: -MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/stderr -MPI Rank 2: configparameters: cntk.cntk:timestamping=true -MPI Rank 2: 08/16/2016 03:19:22: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 2: 08/16/2016 03:19:22: Commands: speechTrain -MPI Rank 2: 08/16/2016 03:19:22: Precision = "double" -MPI Rank 2: 08/16/2016 03:19:22: Using 8 CPU threads. -MPI Rank 2: 08/16/2016 03:19:22: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_Parallel1BitQuantization@release_gpu/models/cntkSpeech.dnn -MPI Rank 2: 08/16/2016 03:19:22: CNTKCommandTrainInfo: speechTrain : 3 -MPI Rank 2: 08/16/2016 03:19:22: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 3 -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:22: ############################################################################## -MPI Rank 2: 08/16/2016 03:19:22: # # -MPI Rank 2: 08/16/2016 03:19:22: # Action "train" # -MPI Rank 2: 08/16/2016 03:19:22: # # -MPI Rank 2: 08/16/2016 03:19:22: ############################################################################## -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:22: CNTKCommandTrainBegin: speechTrain -MPI Rank 2: SimpleNetworkBuilder Using GPU 0 -MPI Rank 2: reading script file glob_0000.scp ... 948 entries -MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 2: label set 0: 129 classes -MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:23: Creating virgin network. -MPI Rank 2: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000. -MPI Rank 2: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false). -MPI Rank 2: Microsoft::MSR::CNTK::GPUMatrix::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8 -MPI Rank 2: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 2: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 2: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000. -MPI Rank 2: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false). -MPI Rank 2: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 2: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 2: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000. -MPI Rank 2: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false). -MPI Rank 2: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 2: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 2: -MPI Rank 2: Post-processing network... -MPI Rank 2: -MPI Rank 2: 7 roots: -MPI Rank 2: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() -MPI Rank 2: EvalClassificationError = ClassificationError() -MPI Rank 2: InvStdOfFeatures = InvStdDev() -MPI Rank 2: MeanOfFeatures = Mean() -MPI Rank 2: PosteriorProb = Softmax() -MPI Rank 2: Prior = Mean() -MPI Rank 2: ScaledLogLikelihood = Minus() -MPI Rank 2: -MPI Rank 2: Validating network. 25 nodes to process in pass 1. -MPI Rank 2: -MPI Rank 2: Validating --> labels = InputValue() : -> [132 x *] -MPI Rank 2: Validating --> W2 = LearnableParameter() : -> [132 x 512] -MPI Rank 2: Validating --> W1 = LearnableParameter() : -> [512 x 512] -MPI Rank 2: Validating --> W0 = LearnableParameter() : -> [512 x 363] -MPI Rank 2: Validating --> features = InputValue() : -> [363 x *] -MPI Rank 2: Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363] -MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363] -MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *] -MPI Rank 2: Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *] -MPI Rank 2: Validating --> B0 = LearnableParameter() : -> [512 x 1] -MPI Rank 2: Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 2: Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 2: Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 2: Validating --> B1 = LearnableParameter() : -> [512 x 1] -MPI Rank 2: Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 2: Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 2: Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *] -MPI Rank 2: Validating --> B2 = LearnableParameter() : -> [132 x 1] -MPI Rank 2: Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *] -MPI Rank 2: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 2: Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 2: Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *] -MPI Rank 2: Validating --> Prior = Mean (labels) : [132 x *] -> [132] -MPI Rank 2: Validating --> LogOfPrior = Log (Prior) : [132] -> [132] -MPI Rank 2: Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *] -MPI Rank 2: -MPI Rank 2: Validating network. 17 nodes to process in pass 2. -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: Validating network, final pass. -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input data. -MPI Rank 2: -MPI Rank 2: Post-processing network complete. -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:23: Created model with 25 nodes on GPU 0. -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:23: Training criterion node(s): -MPI Rank 2: 08/16/2016 03:19:23: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:23: Evaluation criterion node(s): -MPI Rank 2: 08/16/2016 03:19:23: EvalClassificationError = ClassificationError -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: Allocating matrices for forward and/or backward propagation. -MPI Rank 2: -MPI Rank 2: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 2: -MPI Rank 2: { B1 : [512 x 1] (gradient) -MPI Rank 2: H2 : [512 x 1 x *] (gradient) -MPI Rank 2: HLast : [132 x 1 x *] (gradient) } -MPI Rank 2: { B0 : [512 x 1] (gradient) -MPI Rank 2: H1 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 2: W2*H1 : [132 x 1 x *] } -MPI Rank 2: { H1 : [512 x 1 x *] -MPI Rank 2: W0*features : [512 x *] (gradient) } -MPI Rank 2: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1 : [512 x 1 x *] } -MPI Rank 2: { W1 : [512 x 512] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 2: { W0 : [512 x 363] (gradient) -MPI Rank 2: W0*features+B0 : [512 x 1 x *] } -MPI Rank 2: { H2 : [512 x 1 x *] -MPI Rank 2: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 2: { HLast : [132 x 1 x *] -MPI Rank 2: W2 : [132 x 512] (gradient) } -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:23: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:23: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 08/16/2016 03:19:23: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 08/16/2016 03:19:23: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 2: 08/16/2016 03:19:23: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 2: 08/16/2016 03:19:23: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 2: 08/16/2016 03:19:23: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:23: Precomputing --> 3 PreCompute nodes found. -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:23: MeanOfFeatures = Mean() -MPI Rank 2: 08/16/2016 03:19:23: InvStdOfFeatures = InvStdDev() -MPI Rank 2: 08/16/2016 03:19:23: Prior = Mean() -MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:28: Precomputing --> Completed. -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:28: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:28: Starting minibatch loop. -MPI Rank 2: 08/16/2016 03:19:28: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.0912s; samplesPerSecond = 7021.2 -MPI Rank 2: 08/16/2016 03:19:28: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.0956s; samplesPerSecond = 6692.3 -MPI Rank 2: 08/16/2016 03:19:28: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.0836s; samplesPerSecond = 7652.6 -MPI Rank 2: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.73643568 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.0959s; samplesPerSecond = 6671.7 -MPI Rank 2: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83079081 * 640; EvalClassificationError = 0.88281250 * 640; time = 0.0910s; samplesPerSecond = 7030.3 -MPI Rank 2: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71437689 * 640; EvalClassificationError = 0.86875000 * 640; time = 0.1035s; samplesPerSecond = 6184.1 -MPI Rank 2: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.42186230 * 640; EvalClassificationError = 0.79062500 * 640; time = 0.1052s; samplesPerSecond = 6083.4 -MPI Rank 2: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53658053 * 640; EvalClassificationError = 0.82031250 * 640; time = 0.0975s; samplesPerSecond = 6564.4 -MPI Rank 2: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.49758017 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.0947s; samplesPerSecond = 6758.0 -MPI Rank 2: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39996308 * 640; EvalClassificationError = 0.80468750 * 640; time = 0.0973s; samplesPerSecond = 6577.9 -MPI Rank 2: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.49445773 * 640; EvalClassificationError = 0.82500000 * 640; time = 0.0949s; samplesPerSecond = 6744.6 -MPI Rank 2: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.26676999 * 640; EvalClassificationError = 0.79218750 * 640; time = 0.1008s; samplesPerSecond = 6346.3 -MPI Rank 2: 08/16/2016 03:19:29: Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.18870173 * 640; EvalClassificationError = 0.78906250 * 640; time = 0.0908s; samplesPerSecond = 7044.7 -MPI Rank 2: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.05687264 * 640; EvalClassificationError = 0.74687500 * 640; time = 0.1112s; samplesPerSecond = 5753.2 -MPI Rank 2: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.95594569 * 640; EvalClassificationError = 0.71875000 * 640; time = 0.0946s; samplesPerSecond = 6763.8 -MPI Rank 2: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.10219604 * 640; EvalClassificationError = 0.74062500 * 640; time = 0.0886s; samplesPerSecond = 7225.6 -MPI Rank 2: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.80745014 * 640; EvalClassificationError = 0.70625000 * 640; time = 0.1048s; samplesPerSecond = 6108.6 -MPI Rank 2: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72061842 * 640; EvalClassificationError = 0.65468750 * 640; time = 0.0928s; samplesPerSecond = 6895.7 -MPI Rank 2: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80425747 * 640; EvalClassificationError = 0.71718750 * 640; time = 0.0930s; samplesPerSecond = 6882.8 -MPI Rank 2: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.71253068 * 640; EvalClassificationError = 0.67812500 * 640; time = 0.0850s; samplesPerSecond = 7533.0 -MPI Rank 2: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.59360399 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.0877s; samplesPerSecond = 7301.7 -MPI Rank 2: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60386649 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.0880s; samplesPerSecond = 7269.5 -MPI Rank 2: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53706678 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.0909s; samplesPerSecond = 7038.5 -MPI Rank 2: 08/16/2016 03:19:30: Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.56177343 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.0855s; samplesPerSecond = 7482.3 -MPI Rank 2: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.50118791 * 640; EvalClassificationError = 0.64218750 * 640; time = 0.1141s; samplesPerSecond = 5608.1 -MPI Rank 2: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.40119788 * 640; EvalClassificationError = 0.62500000 * 640; time = 0.0974s; samplesPerSecond = 6573.7 -MPI Rank 2: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27491503 * 640; EvalClassificationError = 0.58906250 * 640; time = 0.0854s; samplesPerSecond = 7498.0 -MPI Rank 2: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.51724208 * 640; EvalClassificationError = 0.65781250 * 640; time = 0.1040s; samplesPerSecond = 6154.6 -MPI Rank 2: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27797542 * 640; EvalClassificationError = 0.59687500 * 640; time = 0.1073s; samplesPerSecond = 5966.9 -MPI Rank 2: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26017740 * 640; EvalClassificationError = 0.60937500 * 640; time = 0.0921s; samplesPerSecond = 6948.4 -MPI Rank 2: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24735342 * 640; EvalClassificationError = 0.58437500 * 640; time = 0.0959s; samplesPerSecond = 6674.3 -MPI Rank 2: 08/16/2016 03:19:31: Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.23665382 * 640; EvalClassificationError = 0.60625000 * 640; time = 0.0907s; samplesPerSecond = 7055.5 -MPI Rank 2: 08/16/2016 03:19:31: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.03815141 * 20480; EvalClassificationError = 0.73432617 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.06702s -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:31: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. -MPI Rank 2: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.2089s; samplesPerSecond = 12254.0 -MPI Rank 2: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1802s; samplesPerSecond = 14210.1 -MPI Rank 2: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1701s; samplesPerSecond = 15046.4 -MPI Rank 2: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.06745115 * 2560; EvalClassificationError = 0.56171875 * 2560; time = 0.1684s; samplesPerSecond = 15206.1 -MPI Rank 2: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.06705242 * 2560; EvalClassificationError = 0.55976563 * 2560; time = 0.1697s; samplesPerSecond = 15085.8 -MPI Rank 2: 08/16/2016 03:19:33: Epoch[ 2 of 3]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.00136482 * 2560; EvalClassificationError = 0.54531250 * 2560; time = 0.1754s; samplesPerSecond = 14595.8 -MPI Rank 2: 08/16/2016 03:19:33: Epoch[ 2 of 3]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 1.99508116 * 2560; EvalClassificationError = 0.54765625 * 2560; time = 0.1692s; samplesPerSecond = 15129.6 -MPI Rank 2: 08/16/2016 03:19:33: Epoch[ 2 of 3]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 1.99964996 * 2560; EvalClassificationError = 0.55507812 * 2560; time = 0.1718s; samplesPerSecond = 14899.1 -MPI Rank 2: 08/16/2016 03:19:33: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.07215304 * 20480; EvalClassificationError = 0.56293945 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.42528s -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:33: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. -MPI Rank 2: 08/16/2016 03:19:33: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.3569s; samplesPerSecond = 28689.5 -MPI Rank 2: 08/16/2016 03:19:34: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.3228s; samplesPerSecond = 31727.1 -MPI Rank 2: 08/16/2016 03:19:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.689913s -MPI Rank 2: 08/16/2016 03:19:34: CNTKCommandTrainEnd: speechTrain -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:34: Action "train" complete. -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:34: __COMPLETED__ -MPI Rank 2: ~MPIWrapper \ No newline at end of file diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/run-test b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/run-test deleted file mode 100755 index 741246016..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/run-test +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -. $TEST_ROOT_DIR/run-test-common - -OriginalTestDir=../../../DNN/Parallel1BitQuantization -ConfigDir=$TEST_DIR/../../../DNN -LogFileName=stderr -Instances=3 -NumCPUThreads=$(threadsPerInstance $Instances) - -(cd $TEST_DIR/$OriginalTestDir && md5sum baseline*) | (cd $TEST_DIR && md5sum --status -c -) -if [ $? != 0 ]; then - echo Error: Baselines must match original test. Copy from $OriginalTestDir. - exit 1 -fi - -# cntkmpirun -cntkmpirun "-n $Instances" cntk.cntk "speechTrain=[reader=[readerType=HTKDeserializers]] numCPUThreads=$NumCPUThreads precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]]" -ExitCode=$? -sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank0 -sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank1 -sed 's/^/MPI Rank 2: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank2 -exit $ExitCode diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/testcases.yml b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/testcases.yml deleted file mode 100644 index 8ee32dece..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/testcases.yml +++ /dev/null @@ -1,38 +0,0 @@ -dataDir: ../../../Data -tags: - # - bvt-s (build_sku == '1bitsgd') and ((flavor == 'release') if (os == 'windows') else ((flavor == 'debug') ^ (device == 'cpu'))) - - nightly-s (build_sku == '1bitsgd') - -testCases: - Must train epochs in exactly same order and parameters for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Starting Epoch {{integer}} - - learning rate per sample = {{float}} - - momentum = {{float}} - - Epochs must be finished with expected results for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Finished Epoch[{{integer}} of {{integer}}] - - CrossEntropyWithSoftmax = {{float,tolerance=0%}} - - EvalClassificationError = {{float,tolerance=0%}} - - learningRatePerSample = {{float,tolerance=0.001%}} - - Per-minibatch training results must match for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}} - - " * {{integer}}; " - - CrossEntropyWithSoftmax = {{float,tolerance=0%}} - - EvalClassificationError = {{float,tolerance=0%}} - - DataParallelSGD training parameters must match for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Starting minibatch loop - - DataParallelSGD training - - myRank = {{integer}} - - numNodes = 3 - - numGradientBits = 1 - - distributed reading is ENABLED diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt deleted file mode 100644 index 7ce08e8a8..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt +++ /dev/null @@ -1,613 +0,0 @@ -CPU info: - CPU Model Name: Intel(R) Xeon(R) CPU W3530 @ 2.80GHz - Hardware threads: 4 - Total Memory: 12580404 kB -------------------------------------------------------------------- -=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr -CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:31:19) on cntk-muc03 at 2016/12/15 08:45:01 - -C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes on a single host (3 requested); we (1) are in (participating) -CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:31:19) on cntk-muc03 at 2016/12/15 08:45:01 - -C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes on a single host (3 requested); we (2) are in (participating) -CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:31:19) on cntk-muc03 at 2016/12/15 08:45:01 - -C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes on a single host (3 requested); we (0) are in (participating) -MPI Rank 0: 12/15/2016 08:45:01: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank0 -MPI Rank 0: CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:31:19) on cntk-muc03 at 2016/12/15 08:45:01 -MPI Rank 0: -MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr -MPI Rank 0: 12/15/2016 08:45:01: Using 1 CPU threads. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:01: ############################################################################## -MPI Rank 0: 12/15/2016 08:45:01: # # -MPI Rank 0: 12/15/2016 08:45:01: # speechTrain command (train action) # -MPI Rank 0: 12/15/2016 08:45:01: # # -MPI Rank 0: 12/15/2016 08:45:01: ############################################################################## -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:01: -MPI Rank 0: Creating virgin network. -MPI Rank 0: SimpleNetworkBuilder Using CPU -MPI Rank 0: reading script file glob_0000.scp ... 948 entries -MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 0: label set 0: 129 classes -MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 0: 12/15/2016 08:45:02: -MPI Rank 0: Model has 25 nodes. Using CPU. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:02: Training criterion: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 0: 12/15/2016 08:45:02: Evaluation criterion: EvalClassificationError = ClassificationError -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: Allocating matrices for forward and/or backward propagation. -MPI Rank 0: -MPI Rank 0: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 0: -MPI Rank 0: { B0 : [512 x 1] (gradient) -MPI Rank 0: H1 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 0: W2*H1 : [132 x 1 x *] } -MPI Rank 0: { HLast : [132 x 1 x *] -MPI Rank 0: W2 : [132 x 512] (gradient) } -MPI Rank 0: { W0 : [512 x 363] (gradient) -MPI Rank 0: W0*features+B0 : [512 x 1 x *] } -MPI Rank 0: { H2 : [512 x 1 x *] -MPI Rank 0: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 0: { B1 : [512 x 1] (gradient) -MPI Rank 0: H2 : [512 x 1 x *] (gradient) -MPI Rank 0: HLast : [132 x 1 x *] (gradient) } -MPI Rank 0: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1 : [512 x 1 x *] } -MPI Rank 0: { W1 : [512 x 512] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 0: { H1 : [512 x 1 x *] -MPI Rank 0: W0*features : [512 x *] (gradient) } -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:02: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:02: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 12/15/2016 08:45:02: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 12/15/2016 08:45:02: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 0: 12/15/2016 08:45:02: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 0: 12/15/2016 08:45:02: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 0: 12/15/2016 08:45:02: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 0: -MPI Rank 0: Initializing dataParallelSGD for 1-bit quantization. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:02: Precomputing --> 3 PreCompute nodes found. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:02: MeanOfFeatures = Mean() -MPI Rank 0: 12/15/2016 08:45:02: InvStdOfFeatures = InvStdDev() -MPI Rank 0: 12/15/2016 08:45:02: Prior = Mean() -MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:05: Precomputing --> Completed. -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:06: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:06: Starting minibatch loop. -MPI Rank 0: 12/15/2016 08:45:06: Epoch[ 1 of 4]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.59755198 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.2622s; samplesPerSecond = 2440.9 -MPI Rank 0: 12/15/2016 08:45:06: Epoch[ 1 of 4]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.34610349 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.2583s; samplesPerSecond = 2477.8 -MPI Rank 0: 12/15/2016 08:45:07: Epoch[ 1 of 4]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98222516 * 640; EvalClassificationError = 0.89062500 * 640; time = 0.2540s; samplesPerSecond = 2520.0 -MPI Rank 0: 12/15/2016 08:45:07: Epoch[ 1 of 4]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.74152814 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.2483s; samplesPerSecond = 2577.9 -MPI Rank 0: 12/15/2016 08:45:07: Epoch[ 1 of 4]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83818572 * 640; EvalClassificationError = 0.86718750 * 640; time = 0.2509s; samplesPerSecond = 2551.2 -MPI Rank 0: 12/15/2016 08:45:07: Epoch[ 1 of 4]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71641238 * 640; EvalClassificationError = 0.87500000 * 640; time = 0.2546s; samplesPerSecond = 2514.0 -MPI Rank 0: 12/15/2016 08:45:08: Epoch[ 1 of 4]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.41802791 * 640; EvalClassificationError = 0.79687500 * 640; time = 0.2567s; samplesPerSecond = 2493.6 -MPI Rank 0: 12/15/2016 08:45:08: Epoch[ 1 of 4]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53832947 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2573s; samplesPerSecond = 2487.3 -MPI Rank 0: 12/15/2016 08:45:08: Epoch[ 1 of 4]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.50628076 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.2603s; samplesPerSecond = 2459.0 -MPI Rank 0: 12/15/2016 08:45:08: Epoch[ 1 of 4]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.41478252 * 640; EvalClassificationError = 0.80781250 * 640; time = 0.2683s; samplesPerSecond = 2385.0 -MPI Rank 0: 12/15/2016 08:45:09: Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.51031210 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2675s; samplesPerSecond = 2392.1 -MPI Rank 0: 12/15/2016 08:45:09: Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.28365485 * 640; EvalClassificationError = 0.79375000 * 640; time = 0.2573s; samplesPerSecond = 2486.9 -MPI Rank 0: 12/15/2016 08:45:09: Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.20932117 * 640; EvalClassificationError = 0.79531250 * 640; time = 0.2748s; samplesPerSecond = 2329.1 -MPI Rank 0: 12/15/2016 08:45:09: Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07460534 * 640; EvalClassificationError = 0.75468750 * 640; time = 0.2580s; samplesPerSecond = 2480.5 -MPI Rank 0: 12/15/2016 08:45:10: Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.97529104 * 640; EvalClassificationError = 0.72031250 * 640; time = 0.2510s; samplesPerSecond = 2549.4 -MPI Rank 0: 12/15/2016 08:45:10: Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.11968882 * 640; EvalClassificationError = 0.74531250 * 640; time = 0.2493s; samplesPerSecond = 2567.7 -MPI Rank 0: 12/15/2016 08:45:10: Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.84172140 * 640; EvalClassificationError = 0.71093750 * 640; time = 0.2492s; samplesPerSecond = 2568.6 -MPI Rank 0: 12/15/2016 08:45:10: Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74031745 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2504s; samplesPerSecond = 2556.3 -MPI Rank 0: 12/15/2016 08:45:11: Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83858085 * 640; EvalClassificationError = 0.72656250 * 640; time = 0.2555s; samplesPerSecond = 2504.4 -MPI Rank 0: 12/15/2016 08:45:11: Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.74632253 * 640; EvalClassificationError = 0.69218750 * 640; time = 0.2535s; samplesPerSecond = 2524.9 -MPI Rank 0: 12/15/2016 08:45:11: Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.61033254 * 640; EvalClassificationError = 0.66250000 * 640; time = 0.2536s; samplesPerSecond = 2523.8 -MPI Rank 0: 12/15/2016 08:45:11: Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61330754 * 640; EvalClassificationError = 0.65000000 * 640; time = 0.2634s; samplesPerSecond = 2429.6 -MPI Rank 0: 12/15/2016 08:45:12: Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.54591810 * 640; EvalClassificationError = 0.66406250 * 640; time = 0.2542s; samplesPerSecond = 2517.6 -MPI Rank 0: 12/15/2016 08:45:12: Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.57566512 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2641s; samplesPerSecond = 2423.0 -MPI Rank 0: 12/15/2016 08:45:12: Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.49164945 * 640; EvalClassificationError = 0.63281250 * 640; time = 0.2655s; samplesPerSecond = 2410.5 -MPI Rank 0: 12/15/2016 08:45:12: Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39954796 * 640; EvalClassificationError = 0.62812500 * 640; time = 0.2630s; samplesPerSecond = 2433.9 -MPI Rank 0: 12/15/2016 08:45:13: Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27034227 * 640; EvalClassificationError = 0.59375000 * 640; time = 0.2682s; samplesPerSecond = 2386.6 -MPI Rank 0: 12/15/2016 08:45:13: Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.52112387 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2565s; samplesPerSecond = 2495.5 -MPI Rank 0: 12/15/2016 08:45:13: Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27800991 * 640; EvalClassificationError = 0.59062500 * 640; time = 0.2532s; samplesPerSecond = 2527.9 -MPI Rank 0: 12/15/2016 08:45:14: Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26783634 * 640; EvalClassificationError = 0.61093750 * 640; time = 0.2553s; samplesPerSecond = 2507.0 -MPI Rank 0: 12/15/2016 08:45:14: Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24590355 * 640; EvalClassificationError = 0.58593750 * 640; time = 0.2611s; samplesPerSecond = 2451.3 -MPI Rank 0: 12/15/2016 08:45:14: Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.24415615 * 640; EvalClassificationError = 0.59843750 * 640; time = 0.2518s; samplesPerSecond = 2541.8 -MPI Rank 0: 12/15/2016 08:45:14: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.04696987 * 20480; EvalClassificationError = 0.73583984 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=8.26s -MPI Rank 0: 12/15/2016 08:45:14: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.1' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:14: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:14: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 0: Actual gradient aggregation time: 0.017159 -MPI Rank 0: Async gradient aggregation wait time: 4e-006 -MPI Rank 0: Actual gradient aggregation time: 0.022209 -MPI Rank 0: 12/15/2016 08:45:15: Epoch[ 2 of 4]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.23258828 * 2304; EvalClassificationError = 0.61414931 * 2304; time = 0.6861s; samplesPerSecond = 3358.2 -MPI Rank 0: Async gradient aggregation wait time: 0.007336 -MPI Rank 0: Actual gradient aggregation time: 0.062504 -MPI Rank 0: Async gradient aggregation wait time: 4e-006 -MPI Rank 0: Actual gradient aggregation time: 0.017296 -MPI Rank 0: 12/15/2016 08:45:15: Epoch[ 2 of 4]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.23901091 * 2560; EvalClassificationError = 0.58320313 * 2560; time = 0.5216s; samplesPerSecond = 4908.2 -MPI Rank 0: Async gradient aggregation wait time: 0.022689 -MPI Rank 0: Actual gradient aggregation time: 0.041876 -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.045387 -MPI Rank 0: 12/15/2016 08:45:16: Epoch[ 2 of 4]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.16822363 * 2560; EvalClassificationError = 0.57773438 * 2560; time = 0.6671s; samplesPerSecond = 3837.3 -MPI Rank 0: Async gradient aggregation wait time: 0.00641 -MPI Rank 0: Actual gradient aggregation time: 0.043876 -MPI Rank 0: Async gradient aggregation wait time: 4e-006 -MPI Rank 0: Actual gradient aggregation time: 0.027536 -MPI Rank 0: 12/15/2016 08:45:17: Epoch[ 2 of 4]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.19927971 * 2560; EvalClassificationError = 0.62187500 * 2560; time = 0.5975s; samplesPerSecond = 4284.5 -MPI Rank 0: Async gradient aggregation wait time: 0.006883 -MPI Rank 0: Actual gradient aggregation time: 0.045305 -MPI Rank 0: Async gradient aggregation wait time: 0.062512 -MPI Rank 0: Actual gradient aggregation time: 0.05298 -MPI Rank 0: 12/15/2016 08:45:17: Epoch[ 2 of 4]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.22075939 * 2560; EvalClassificationError = 0.59648437 * 2560; time = 0.5649s; samplesPerSecond = 4531.5 -MPI Rank 0: Async gradient aggregation wait time: 0.012483 -MPI Rank 0: Actual gradient aggregation time: 0.049937 -MPI Rank 0: Async gradient aggregation wait time: 0.010496 -MPI Rank 0: Actual gradient aggregation time: 0.046163 -MPI Rank 0: 12/15/2016 08:45:18: Epoch[ 2 of 4]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.11227615 * 2560; EvalClassificationError = 0.57382813 * 2560; time = 0.5057s; samplesPerSecond = 5062.7 -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.025722 -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.018403 -MPI Rank 0: 12/15/2016 08:45:18: Epoch[ 2 of 4]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 2.17322591 * 2560; EvalClassificationError = 0.61914063 * 2560; time = 0.5979s; samplesPerSecond = 4281.6 -MPI Rank 0: Async gradient aggregation wait time: 2e-006 -MPI Rank 0: Actual gradient aggregation time: 0.026228 -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.027207 -MPI Rank 0: 12/15/2016 08:45:19: Epoch[ 2 of 4]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 2.13027284 * 2560; EvalClassificationError = 0.60820312 * 2560; time = 0.6650s; samplesPerSecond = 3849.4 -MPI Rank 0: Async gradient aggregation wait time: 0.015012 -MPI Rank 0: Actual gradient aggregation time: 0.024411 -MPI Rank 0: 12/15/2016 08:45:19: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.18324277 * 20480; EvalClassificationError = 0.59892578 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.85012s -MPI Rank 0: 12/15/2016 08:45:19: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.2' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:19: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:19: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.01979 -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.017679 -MPI Rank 0: 12/15/2016 08:45:20: Epoch[ 3 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 2.20597127 * 9216; EvalClassificationError = 0.58593750 * 9216; time = 1.3442s; samplesPerSecond = 6856.0 -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.065355 -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.066516 -MPI Rank 0: 12/15/2016 08:45:22: Epoch[ 3 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 2.14626719 * 10240; EvalClassificationError = 0.58886719 * 10240; time = 1.2738s; samplesPerSecond = 8039.0 -MPI Rank 0: 12/15/2016 08:45:22: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.16917041 * 20480; EvalClassificationError = 0.58637695 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=2.73964s -MPI Rank 0: 12/15/2016 08:45:22: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.3' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:22: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:22: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.04443 -MPI Rank 0: Async gradient aggregation wait time: 0.012333 -MPI Rank 0: Actual gradient aggregation time: 0.126445 -MPI Rank 0: 12/15/2016 08:45:23: Epoch[ 4 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.99117050 * 9216; EvalClassificationError = 0.54427083 * 9216; time = 1.2882s; samplesPerSecond = 7154.4 -MPI Rank 0: Async gradient aggregation wait time: 0.029068 -MPI Rank 0: Actual gradient aggregation time: 0.108564 -MPI Rank 0: Async gradient aggregation wait time: 0.006866 -MPI Rank 0: Actual gradient aggregation time: 0.123917 -MPI Rank 0: 12/15/2016 08:45:24: Epoch[ 4 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97438950 * 10240; EvalClassificationError = 0.54345703 * 10240; time = 1.3228s; samplesPerSecond = 7741.2 -MPI Rank 0: Async gradient aggregation wait time: 0.022913 -MPI Rank 0: 12/15/2016 08:45:25: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.98353069 * 20480; EvalClassificationError = 0.54428711 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=2.76533s -MPI Rank 0: 12/15/2016 08:45:25: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:25: Action "train" complete. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:45:25: __COMPLETED__ -MPI Rank 1: 12/15/2016 08:45:02: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank1 -MPI Rank 1: CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:31:19) on cntk-muc03 at 2016/12/15 08:45:01 -MPI Rank 1: -MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr -MPI Rank 1: 12/15/2016 08:45:02: Using 1 CPU threads. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:02: ############################################################################## -MPI Rank 1: 12/15/2016 08:45:02: # # -MPI Rank 1: 12/15/2016 08:45:02: # speechTrain command (train action) # -MPI Rank 1: 12/15/2016 08:45:02: # # -MPI Rank 1: 12/15/2016 08:45:02: ############################################################################## -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:02: -MPI Rank 1: Creating virgin network. -MPI Rank 1: SimpleNetworkBuilder Using CPU -MPI Rank 1: reading script file glob_0000.scp ... 948 entries -MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 1: label set 0: 129 classes -MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 1: 12/15/2016 08:45:02: -MPI Rank 1: Model has 25 nodes. Using CPU. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:02: Training criterion: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 1: 12/15/2016 08:45:02: Evaluation criterion: EvalClassificationError = ClassificationError -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: Allocating matrices for forward and/or backward propagation. -MPI Rank 1: -MPI Rank 1: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 1: -MPI Rank 1: { B1 : [512 x 1] (gradient) -MPI Rank 1: H2 : [512 x 1 x *] (gradient) -MPI Rank 1: HLast : [132 x 1 x *] (gradient) } -MPI Rank 1: { W1 : [512 x 512] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 1: { W0 : [512 x 363] (gradient) -MPI Rank 1: W0*features+B0 : [512 x 1 x *] } -MPI Rank 1: { H1 : [512 x 1 x *] -MPI Rank 1: W0*features : [512 x *] (gradient) } -MPI Rank 1: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1 : [512 x 1 x *] } -MPI Rank 1: { H2 : [512 x 1 x *] -MPI Rank 1: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 1: { B0 : [512 x 1] (gradient) -MPI Rank 1: H1 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 1: W2*H1 : [132 x 1 x *] } -MPI Rank 1: { HLast : [132 x 1 x *] -MPI Rank 1: W2 : [132 x 512] (gradient) } -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:02: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:02: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 12/15/2016 08:45:02: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 12/15/2016 08:45:02: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 1: 12/15/2016 08:45:02: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 1: 12/15/2016 08:45:02: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 1: 12/15/2016 08:45:02: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 1: -MPI Rank 1: Initializing dataParallelSGD for 1-bit quantization. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:02: Precomputing --> 3 PreCompute nodes found. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:02: MeanOfFeatures = Mean() -MPI Rank 1: 12/15/2016 08:45:02: InvStdOfFeatures = InvStdDev() -MPI Rank 1: 12/15/2016 08:45:02: Prior = Mean() -MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:05: Precomputing --> Completed. -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:06: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:06: Starting minibatch loop. -MPI Rank 1: 12/15/2016 08:45:06: Epoch[ 1 of 4]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.59755198 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.2913s; samplesPerSecond = 2197.1 -MPI Rank 1: 12/15/2016 08:45:06: Epoch[ 1 of 4]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.34610349 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.2671s; samplesPerSecond = 2396.1 -MPI Rank 1: 12/15/2016 08:45:07: Epoch[ 1 of 4]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98222516 * 640; EvalClassificationError = 0.89062500 * 640; time = 0.2515s; samplesPerSecond = 2544.4 -MPI Rank 1: 12/15/2016 08:45:07: Epoch[ 1 of 4]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.74152814 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.2473s; samplesPerSecond = 2588.5 -MPI Rank 1: 12/15/2016 08:45:07: Epoch[ 1 of 4]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83818572 * 640; EvalClassificationError = 0.86718750 * 640; time = 0.2596s; samplesPerSecond = 2465.4 -MPI Rank 1: 12/15/2016 08:45:07: Epoch[ 1 of 4]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71641238 * 640; EvalClassificationError = 0.87500000 * 640; time = 0.2641s; samplesPerSecond = 2423.6 -MPI Rank 1: 12/15/2016 08:45:08: Epoch[ 1 of 4]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.41802791 * 640; EvalClassificationError = 0.79687500 * 640; time = 0.2588s; samplesPerSecond = 2472.9 -MPI Rank 1: 12/15/2016 08:45:08: Epoch[ 1 of 4]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53832947 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2569s; samplesPerSecond = 2491.1 -MPI Rank 1: 12/15/2016 08:45:08: Epoch[ 1 of 4]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.50628076 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.2589s; samplesPerSecond = 2471.6 -MPI Rank 1: 12/15/2016 08:45:08: Epoch[ 1 of 4]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.41478252 * 640; EvalClassificationError = 0.80781250 * 640; time = 0.2676s; samplesPerSecond = 2391.9 -MPI Rank 1: 12/15/2016 08:45:09: Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.51031210 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2688s; samplesPerSecond = 2381.2 -MPI Rank 1: 12/15/2016 08:45:09: Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.28365485 * 640; EvalClassificationError = 0.79375000 * 640; time = 0.2586s; samplesPerSecond = 2475.0 -MPI Rank 1: 12/15/2016 08:45:09: Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.20932117 * 640; EvalClassificationError = 0.79531250 * 640; time = 0.2710s; samplesPerSecond = 2361.7 -MPI Rank 1: 12/15/2016 08:45:09: Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07460534 * 640; EvalClassificationError = 0.75468750 * 640; time = 0.2622s; samplesPerSecond = 2440.6 -MPI Rank 1: 12/15/2016 08:45:10: Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.97529104 * 640; EvalClassificationError = 0.72031250 * 640; time = 0.2595s; samplesPerSecond = 2466.3 -MPI Rank 1: 12/15/2016 08:45:10: Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.11968882 * 640; EvalClassificationError = 0.74531250 * 640; time = 0.2516s; samplesPerSecond = 2543.9 -MPI Rank 1: 12/15/2016 08:45:10: Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.84172140 * 640; EvalClassificationError = 0.71093750 * 640; time = 0.2499s; samplesPerSecond = 2560.6 -MPI Rank 1: 12/15/2016 08:45:10: Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74031745 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2585s; samplesPerSecond = 2476.0 -MPI Rank 1: 12/15/2016 08:45:11: Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83858085 * 640; EvalClassificationError = 0.72656250 * 640; time = 0.2595s; samplesPerSecond = 2466.6 -MPI Rank 1: 12/15/2016 08:45:11: Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.74632253 * 640; EvalClassificationError = 0.69218750 * 640; time = 0.2609s; samplesPerSecond = 2452.9 -MPI Rank 1: 12/15/2016 08:45:11: Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.61033254 * 640; EvalClassificationError = 0.66250000 * 640; time = 0.2680s; samplesPerSecond = 2387.9 -MPI Rank 1: 12/15/2016 08:45:12: Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61330754 * 640; EvalClassificationError = 0.65000000 * 640; time = 0.2573s; samplesPerSecond = 2487.7 -MPI Rank 1: 12/15/2016 08:45:12: Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.54591810 * 640; EvalClassificationError = 0.66406250 * 640; time = 0.2515s; samplesPerSecond = 2544.7 -MPI Rank 1: 12/15/2016 08:45:12: Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.57566512 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2533s; samplesPerSecond = 2526.8 -MPI Rank 1: 12/15/2016 08:45:12: Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.49164945 * 640; EvalClassificationError = 0.63281250 * 640; time = 0.2568s; samplesPerSecond = 2492.4 -MPI Rank 1: 12/15/2016 08:45:13: Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39954796 * 640; EvalClassificationError = 0.62812500 * 640; time = 0.2552s; samplesPerSecond = 2507.7 -MPI Rank 1: 12/15/2016 08:45:13: Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27034227 * 640; EvalClassificationError = 0.59375000 * 640; time = 0.2580s; samplesPerSecond = 2480.8 -MPI Rank 1: 12/15/2016 08:45:13: Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.52112387 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2551s; samplesPerSecond = 2508.4 -MPI Rank 1: 12/15/2016 08:45:13: Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27800991 * 640; EvalClassificationError = 0.59062500 * 640; time = 0.2533s; samplesPerSecond = 2526.6 -MPI Rank 1: 12/15/2016 08:45:14: Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26783634 * 640; EvalClassificationError = 0.61093750 * 640; time = 0.2562s; samplesPerSecond = 2497.6 -MPI Rank 1: 12/15/2016 08:45:14: Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24590355 * 640; EvalClassificationError = 0.58593750 * 640; time = 0.2600s; samplesPerSecond = 2461.6 -MPI Rank 1: 12/15/2016 08:45:14: Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.24415615 * 640; EvalClassificationError = 0.59843750 * 640; time = 0.2475s; samplesPerSecond = 2585.6 -MPI Rank 1: 12/15/2016 08:45:14: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.04696987 * 20480; EvalClassificationError = 0.73583984 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=8.30884s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:14: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:14: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 1: Actual gradient aggregation time: 0.042853 -MPI Rank 1: Async gradient aggregation wait time: 2e-006 -MPI Rank 1: Actual gradient aggregation time: 0.025159 -MPI Rank 1: 12/15/2016 08:45:15: Epoch[ 2 of 4]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.23258828 * 2304; EvalClassificationError = 0.61414931 * 2304; time = 0.7137s; samplesPerSecond = 3228.3 -MPI Rank 1: Async gradient aggregation wait time: 0.011636 -MPI Rank 1: Actual gradient aggregation time: 0.05291 -MPI Rank 1: Async gradient aggregation wait time: 3e-006 -MPI Rank 1: Actual gradient aggregation time: 0.06182 -MPI Rank 1: 12/15/2016 08:45:15: Epoch[ 2 of 4]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.23901091 * 2560; EvalClassificationError = 0.58320313 * 2560; time = 0.5390s; samplesPerSecond = 4749.4 -MPI Rank 1: Async gradient aggregation wait time: 3e-006 -MPI Rank 1: Actual gradient aggregation time: 0.021365 -MPI Rank 1: Async gradient aggregation wait time: 0.03829 -MPI Rank 1: Actual gradient aggregation time: 0.067366 -MPI Rank 1: 12/15/2016 08:45:16: Epoch[ 2 of 4]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.16822363 * 2560; EvalClassificationError = 0.57773438 * 2560; time = 0.6390s; samplesPerSecond = 4006.2 -MPI Rank 1: Async gradient aggregation wait time: 0.016716 -MPI Rank 1: Actual gradient aggregation time: 0.046773 -MPI Rank 1: Async gradient aggregation wait time: 3e-006 -MPI Rank 1: Actual gradient aggregation time: 0.035357 -MPI Rank 1: 12/15/2016 08:45:17: Epoch[ 2 of 4]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.19927971 * 2560; EvalClassificationError = 0.62187500 * 2560; time = 0.6239s; samplesPerSecond = 4103.2 -MPI Rank 1: Async gradient aggregation wait time: 2e-006 -MPI Rank 1: Actual gradient aggregation time: 0.01908 -MPI Rank 1: Async gradient aggregation wait time: 2e-006 -MPI Rank 1: Actual gradient aggregation time: 0.019362 -MPI Rank 1: 12/15/2016 08:45:17: Epoch[ 2 of 4]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.22075939 * 2560; EvalClassificationError = 0.59648437 * 2560; time = 0.5690s; samplesPerSecond = 4499.2 -MPI Rank 1: Async gradient aggregation wait time: 3e-006 -MPI Rank 1: Actual gradient aggregation time: 0.019096 -MPI Rank 1: Async gradient aggregation wait time: 3e-006 -MPI Rank 1: Actual gradient aggregation time: 0.028039 -MPI Rank 1: 12/15/2016 08:45:18: Epoch[ 2 of 4]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.11227615 * 2560; EvalClassificationError = 0.57382813 * 2560; time = 0.4738s; samplesPerSecond = 5403.2 -MPI Rank 1: Async gradient aggregation wait time: 2e-006 -MPI Rank 1: Actual gradient aggregation time: 0.051405 -MPI Rank 1: Async gradient aggregation wait time: 2e-006 -MPI Rank 1: Actual gradient aggregation time: 0.050967 -MPI Rank 1: 12/15/2016 08:45:18: Epoch[ 2 of 4]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 2.17322591 * 2560; EvalClassificationError = 0.61914063 * 2560; time = 0.6206s; samplesPerSecond = 4125.0 -MPI Rank 1: Async gradient aggregation wait time: 2e-006 -MPI Rank 1: Actual gradient aggregation time: 0.023017 -MPI Rank 1: Async gradient aggregation wait time: 0.03727 -MPI Rank 1: Actual gradient aggregation time: 0.037849 -MPI Rank 1: 12/15/2016 08:45:19: Epoch[ 2 of 4]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 2.13027284 * 2560; EvalClassificationError = 0.60820312 * 2560; time = 0.6112s; samplesPerSecond = 4188.6 -MPI Rank 1: Async gradient aggregation wait time: 0.04738 -MPI Rank 1: Actual gradient aggregation time: 0.01977 -MPI Rank 1: 12/15/2016 08:45:19: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.18324277 * 20480; EvalClassificationError = 0.59892578 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.86625s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:19: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:19: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 1: Async gradient aggregation wait time: 3e-006 -MPI Rank 1: Actual gradient aggregation time: 0.048671 -MPI Rank 1: Async gradient aggregation wait time: 2e-006 -MPI Rank 1: Actual gradient aggregation time: 0.162308 -MPI Rank 1: 12/15/2016 08:45:20: Epoch[ 3 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 2.20597127 * 9216; EvalClassificationError = 0.58593750 * 9216; time = 1.4064s; samplesPerSecond = 6553.0 -MPI Rank 1: Async gradient aggregation wait time: 2e-006 -MPI Rank 1: Actual gradient aggregation time: 0.029329 -MPI Rank 1: Async gradient aggregation wait time: 2e-006 -MPI Rank 1: Actual gradient aggregation time: 0.028563 -MPI Rank 1: 12/15/2016 08:45:22: Epoch[ 3 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 2.14626719 * 10240; EvalClassificationError = 0.58886719 * 10240; time = 1.2755s; samplesPerSecond = 8028.1 -MPI Rank 1: 12/15/2016 08:45:22: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.16917041 * 20480; EvalClassificationError = 0.58637695 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=2.74129s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:22: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:22: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 1: Async gradient aggregation wait time: 3e-006 -MPI Rank 1: Actual gradient aggregation time: 0.078784 -MPI Rank 1: Async gradient aggregation wait time: 3e-006 -MPI Rank 1: Actual gradient aggregation time: 0.020759 -MPI Rank 1: 12/15/2016 08:45:23: Epoch[ 4 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.99117050 * 9216; EvalClassificationError = 0.54427083 * 9216; time = 1.3985s; samplesPerSecond = 6589.8 -MPI Rank 1: Async gradient aggregation wait time: 3e-006 -MPI Rank 1: Actual gradient aggregation time: 0.022602 -MPI Rank 1: Async gradient aggregation wait time: 3e-006 -MPI Rank 1: Actual gradient aggregation time: 0.019065 -MPI Rank 1: 12/15/2016 08:45:25: Epoch[ 4 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97438950 * 10240; EvalClassificationError = 0.54345703 * 10240; time = 1.3151s; samplesPerSecond = 7786.7 -MPI Rank 1: Async gradient aggregation wait time: 0.022045 -MPI Rank 1: 12/15/2016 08:45:25: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.98353069 * 20480; EvalClassificationError = 0.54428711 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=2.76637s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:25: Action "train" complete. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:45:25: __COMPLETED__ -MPI Rank 2: 12/15/2016 08:45:02: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank2 -MPI Rank 2: CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:31:19) on cntk-muc03 at 2016/12/15 08:45:01 -MPI Rank 2: -MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215084323.697388\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_cpu/stderr -MPI Rank 2: 12/15/2016 08:45:02: Using 1 CPU threads. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:02: ############################################################################## -MPI Rank 2: 12/15/2016 08:45:02: # # -MPI Rank 2: 12/15/2016 08:45:02: # speechTrain command (train action) # -MPI Rank 2: 12/15/2016 08:45:02: # # -MPI Rank 2: 12/15/2016 08:45:02: ############################################################################## -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:02: -MPI Rank 2: Creating virgin network. -MPI Rank 2: SimpleNetworkBuilder Using CPU -MPI Rank 2: reading script file glob_0000.scp ... 948 entries -MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 2: label set 0: 129 classes -MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 2: 12/15/2016 08:45:03: -MPI Rank 2: Model has 25 nodes. Using CPU. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:03: Training criterion: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 2: 12/15/2016 08:45:03: Evaluation criterion: EvalClassificationError = ClassificationError -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: Allocating matrices for forward and/or backward propagation. -MPI Rank 2: -MPI Rank 2: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 2: -MPI Rank 2: { H1 : [512 x 1 x *] -MPI Rank 2: W0*features : [512 x *] (gradient) } -MPI Rank 2: { B0 : [512 x 1] (gradient) -MPI Rank 2: H1 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 2: W2*H1 : [132 x 1 x *] } -MPI Rank 2: { B1 : [512 x 1] (gradient) -MPI Rank 2: H2 : [512 x 1 x *] (gradient) -MPI Rank 2: HLast : [132 x 1 x *] (gradient) } -MPI Rank 2: { W0 : [512 x 363] (gradient) -MPI Rank 2: W0*features+B0 : [512 x 1 x *] } -MPI Rank 2: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1 : [512 x 1 x *] } -MPI Rank 2: { W1 : [512 x 512] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 2: { H2 : [512 x 1 x *] -MPI Rank 2: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 2: { HLast : [132 x 1 x *] -MPI Rank 2: W2 : [132 x 512] (gradient) } -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:03: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:03: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 12/15/2016 08:45:03: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 12/15/2016 08:45:03: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 2: 12/15/2016 08:45:03: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 2: 12/15/2016 08:45:03: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 2: 12/15/2016 08:45:03: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 2: -MPI Rank 2: Initializing dataParallelSGD for 1-bit quantization. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:03: Precomputing --> 3 PreCompute nodes found. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:03: MeanOfFeatures = Mean() -MPI Rank 2: 12/15/2016 08:45:03: InvStdOfFeatures = InvStdDev() -MPI Rank 2: 12/15/2016 08:45:03: Prior = Mean() -MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:06: Precomputing --> Completed. -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:06: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:06: Starting minibatch loop. -MPI Rank 2: 12/15/2016 08:45:06: Epoch[ 1 of 4]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.59755198 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.2718s; samplesPerSecond = 2354.8 -MPI Rank 2: 12/15/2016 08:45:06: Epoch[ 1 of 4]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.34610349 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.2665s; samplesPerSecond = 2401.4 -MPI Rank 2: 12/15/2016 08:45:07: Epoch[ 1 of 4]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98222516 * 640; EvalClassificationError = 0.89062500 * 640; time = 0.2495s; samplesPerSecond = 2564.7 -MPI Rank 2: 12/15/2016 08:45:07: Epoch[ 1 of 4]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.74152814 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.2478s; samplesPerSecond = 2583.0 -MPI Rank 2: 12/15/2016 08:45:07: Epoch[ 1 of 4]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83818572 * 640; EvalClassificationError = 0.86718750 * 640; time = 0.2617s; samplesPerSecond = 2445.2 -MPI Rank 2: 12/15/2016 08:45:07: Epoch[ 1 of 4]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71641238 * 640; EvalClassificationError = 0.87500000 * 640; time = 0.2624s; samplesPerSecond = 2439.0 -MPI Rank 2: 12/15/2016 08:45:08: Epoch[ 1 of 4]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.41802791 * 640; EvalClassificationError = 0.79687500 * 640; time = 0.2531s; samplesPerSecond = 2529.1 -MPI Rank 2: 12/15/2016 08:45:08: Epoch[ 1 of 4]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53832947 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2515s; samplesPerSecond = 2545.2 -MPI Rank 2: 12/15/2016 08:45:08: Epoch[ 1 of 4]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.50628076 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.2510s; samplesPerSecond = 2549.4 -MPI Rank 2: 12/15/2016 08:45:08: Epoch[ 1 of 4]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.41478252 * 640; EvalClassificationError = 0.80781250 * 640; time = 0.2601s; samplesPerSecond = 2460.8 -MPI Rank 2: 12/15/2016 08:45:09: Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.51031210 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2549s; samplesPerSecond = 2511.1 -MPI Rank 2: 12/15/2016 08:45:09: Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.28365485 * 640; EvalClassificationError = 0.79375000 * 640; time = 0.2507s; samplesPerSecond = 2553.2 -MPI Rank 2: 12/15/2016 08:45:09: Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.20932117 * 640; EvalClassificationError = 0.79531250 * 640; time = 0.2712s; samplesPerSecond = 2359.7 -MPI Rank 2: 12/15/2016 08:45:09: Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07460534 * 640; EvalClassificationError = 0.75468750 * 640; time = 0.2609s; samplesPerSecond = 2453.0 -MPI Rank 2: 12/15/2016 08:45:10: Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.97529104 * 640; EvalClassificationError = 0.72031250 * 640; time = 0.2888s; samplesPerSecond = 2216.0 -MPI Rank 2: 12/15/2016 08:45:10: Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.11968882 * 640; EvalClassificationError = 0.74531250 * 640; time = 0.2517s; samplesPerSecond = 2543.0 -MPI Rank 2: 12/15/2016 08:45:10: Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.84172140 * 640; EvalClassificationError = 0.71093750 * 640; time = 0.2497s; samplesPerSecond = 2563.3 -MPI Rank 2: 12/15/2016 08:45:10: Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74031745 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2535s; samplesPerSecond = 2524.7 -MPI Rank 2: 12/15/2016 08:45:11: Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83858085 * 640; EvalClassificationError = 0.72656250 * 640; time = 0.2557s; samplesPerSecond = 2503.3 -MPI Rank 2: 12/15/2016 08:45:11: Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.74632253 * 640; EvalClassificationError = 0.69218750 * 640; time = 0.2673s; samplesPerSecond = 2394.1 -MPI Rank 2: 12/15/2016 08:45:11: Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.61033254 * 640; EvalClassificationError = 0.66250000 * 640; time = 0.2666s; samplesPerSecond = 2401.0 -MPI Rank 2: 12/15/2016 08:45:12: Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61330754 * 640; EvalClassificationError = 0.65000000 * 640; time = 0.2671s; samplesPerSecond = 2396.5 -MPI Rank 2: 12/15/2016 08:45:12: Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.54591810 * 640; EvalClassificationError = 0.66406250 * 640; time = 0.2558s; samplesPerSecond = 2502.1 -MPI Rank 2: 12/15/2016 08:45:12: Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.57566512 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2641s; samplesPerSecond = 2423.5 -MPI Rank 2: 12/15/2016 08:45:12: Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.49164945 * 640; EvalClassificationError = 0.63281250 * 640; time = 0.2622s; samplesPerSecond = 2441.3 -MPI Rank 2: 12/15/2016 08:45:13: Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39954796 * 640; EvalClassificationError = 0.62812500 * 640; time = 0.2661s; samplesPerSecond = 2405.1 -MPI Rank 2: 12/15/2016 08:45:13: Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27034227 * 640; EvalClassificationError = 0.59375000 * 640; time = 0.2646s; samplesPerSecond = 2418.5 -MPI Rank 2: 12/15/2016 08:45:13: Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.52112387 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2509s; samplesPerSecond = 2550.4 -MPI Rank 2: 12/15/2016 08:45:13: Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27800991 * 640; EvalClassificationError = 0.59062500 * 640; time = 0.2504s; samplesPerSecond = 2555.4 -MPI Rank 2: 12/15/2016 08:45:14: Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26783634 * 640; EvalClassificationError = 0.61093750 * 640; time = 0.2533s; samplesPerSecond = 2527.0 -MPI Rank 2: 12/15/2016 08:45:14: Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24590355 * 640; EvalClassificationError = 0.58593750 * 640; time = 0.2514s; samplesPerSecond = 2545.2 -MPI Rank 2: 12/15/2016 08:45:14: Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.24415615 * 640; EvalClassificationError = 0.59843750 * 640; time = 0.2466s; samplesPerSecond = 2594.9 -MPI Rank 2: 12/15/2016 08:45:14: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.04696987 * 20480; EvalClassificationError = 0.73583984 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=8.29154s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:14: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:14: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 2: Actual gradient aggregation time: 0.045254 -MPI Rank 2: Async gradient aggregation wait time: 0.01563 -MPI Rank 2: Actual gradient aggregation time: 0.034825 -MPI Rank 2: 12/15/2016 08:45:15: Epoch[ 2 of 4]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.23258828 * 2304; EvalClassificationError = 0.61414931 * 2304; time = 0.7033s; samplesPerSecond = 3275.8 -MPI Rank 2: Async gradient aggregation wait time: 0.025709 -MPI Rank 2: Actual gradient aggregation time: 0.062272 -MPI Rank 2: Async gradient aggregation wait time: 0.008646 -MPI Rank 2: Actual gradient aggregation time: 0.074517 -MPI Rank 2: 12/15/2016 08:45:15: Epoch[ 2 of 4]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.23901091 * 2560; EvalClassificationError = 0.58320313 * 2560; time = 0.5417s; samplesPerSecond = 4725.6 -MPI Rank 2: Async gradient aggregation wait time: 0.005925 -MPI Rank 2: Actual gradient aggregation time: 0.04346 -MPI Rank 2: Async gradient aggregation wait time: 0.058485 -MPI Rank 2: Actual gradient aggregation time: 0.06787 -MPI Rank 2: 12/15/2016 08:45:16: Epoch[ 2 of 4]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.16822363 * 2560; EvalClassificationError = 0.57773438 * 2560; time = 0.6751s; samplesPerSecond = 3791.9 -MPI Rank 2: Async gradient aggregation wait time: 4e-006 -MPI Rank 2: Actual gradient aggregation time: 0.038691 -MPI Rank 2: Async gradient aggregation wait time: 0.014519 -MPI Rank 2: Actual gradient aggregation time: 0.044701 -MPI Rank 2: 12/15/2016 08:45:17: Epoch[ 2 of 4]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.19927971 * 2560; EvalClassificationError = 0.62187500 * 2560; time = 0.5667s; samplesPerSecond = 4517.2 -MPI Rank 2: Async gradient aggregation wait time: 0.026562 -MPI Rank 2: Actual gradient aggregation time: 0.04628 -MPI Rank 2: Async gradient aggregation wait time: 3e-006 -MPI Rank 2: Actual gradient aggregation time: 0.048649 -MPI Rank 2: 12/15/2016 08:45:17: Epoch[ 2 of 4]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.22075939 * 2560; EvalClassificationError = 0.59648437 * 2560; time = 0.5674s; samplesPerSecond = 4511.8 -MPI Rank 2: Async gradient aggregation wait time: 0.026443 -MPI Rank 2: Actual gradient aggregation time: 0.049981 -MPI Rank 2: Async gradient aggregation wait time: 0.017769 -MPI Rank 2: Actual gradient aggregation time: 0.037914 -MPI Rank 2: 12/15/2016 08:45:18: Epoch[ 2 of 4]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.11227615 * 2560; EvalClassificationError = 0.57382813 * 2560; time = 0.5163s; samplesPerSecond = 4958.2 -MPI Rank 2: Async gradient aggregation wait time: 3e-006 -MPI Rank 2: Actual gradient aggregation time: 0.054407 -MPI Rank 2: Async gradient aggregation wait time: 0.009165 -MPI Rank 2: Actual gradient aggregation time: 0.06387 -MPI Rank 2: 12/15/2016 08:45:18: Epoch[ 2 of 4]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 2.17322591 * 2560; EvalClassificationError = 0.61914063 * 2560; time = 0.5886s; samplesPerSecond = 4349.3 -MPI Rank 2: Async gradient aggregation wait time: 0.008188 -MPI Rank 2: Actual gradient aggregation time: 0.039783 -MPI Rank 2: Async gradient aggregation wait time: 0.140833 -MPI Rank 2: Actual gradient aggregation time: 0.047561 -MPI Rank 2: 12/15/2016 08:45:19: Epoch[ 2 of 4]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 2.13027284 * 2560; EvalClassificationError = 0.60820312 * 2560; time = 0.6432s; samplesPerSecond = 3979.9 -MPI Rank 2: Async gradient aggregation wait time: 0.035015 -MPI Rank 2: Actual gradient aggregation time: 0.022108 -MPI Rank 2: 12/15/2016 08:45:19: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.18324277 * 20480; EvalClassificationError = 0.59892578 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=4.86587s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:19: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:19: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 2: Async gradient aggregation wait time: 0.058171 -MPI Rank 2: Actual gradient aggregation time: 0.126685 -MPI Rank 2: Async gradient aggregation wait time: 0.054589 -MPI Rank 2: Actual gradient aggregation time: 0.248473 -MPI Rank 2: 12/15/2016 08:45:20: Epoch[ 3 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 2.20597127 * 9216; EvalClassificationError = 0.58593750 * 9216; time = 1.3300s; samplesPerSecond = 6929.2 -MPI Rank 2: Async gradient aggregation wait time: 0.030365 -MPI Rank 2: Actual gradient aggregation time: 0.141699 -MPI Rank 2: Async gradient aggregation wait time: 0.04912 -MPI Rank 2: Actual gradient aggregation time: 0.131064 -MPI Rank 2: 12/15/2016 08:45:22: Epoch[ 3 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 2.14626719 * 10240; EvalClassificationError = 0.58886719 * 10240; time = 1.2240s; samplesPerSecond = 8366.0 -MPI Rank 2: 12/15/2016 08:45:22: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.16917041 * 20480; EvalClassificationError = 0.58637695 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=2.72378s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:22: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:22: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 2: Async gradient aggregation wait time: 3e-006 -MPI Rank 2: Actual gradient aggregation time: 0.129446 -MPI Rank 2: Async gradient aggregation wait time: 0.040623 -MPI Rank 2: Actual gradient aggregation time: 0.126456 -MPI Rank 2: 12/15/2016 08:45:23: Epoch[ 4 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.99117050 * 9216; EvalClassificationError = 0.54427083 * 9216; time = 1.2583s; samplesPerSecond = 7324.0 -MPI Rank 2: Async gradient aggregation wait time: 0.054145 -MPI Rank 2: Actual gradient aggregation time: 0.120512 -MPI Rank 2: Async gradient aggregation wait time: 0.044203 -MPI Rank 2: Actual gradient aggregation time: 0.115423 -MPI Rank 2: 12/15/2016 08:45:24: Epoch[ 4 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97438950 * 10240; EvalClassificationError = 0.54345703 * 10240; time = 1.3223s; samplesPerSecond = 7744.0 -MPI Rank 2: Async gradient aggregation wait time: 0.019428 -MPI Rank 2: 12/15/2016 08:45:25: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.98353069 * 20480; EvalClassificationError = 0.54428711 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=2.73407s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:25: Action "train" complete. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:45:25: __COMPLETED__ diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt deleted file mode 100644 index a66d30a6c..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt +++ /dev/null @@ -1,1942 +0,0 @@ -CPU info: - CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz - Hardware threads: 24 - Total Memory: 268381192 kB -------------------------------------------------------------------- -=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DeviceId=0 timestamping=true numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr -------------------------------------------------------------------- -Build info: - - Built time: Aug 16 2016 03:09:16 - Last modified date: Fri Aug 12 05:28:23 2016 - Build type: Release - Build target: GPU - With 1bit-SGD: yes - Math lib: mkl - CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 - CUB_PATH: c:\src\cub-1.4.1 - CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda - Build Branch: HEAD - Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 - Built by svcphil on Philly-Pool1 - Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -------------------------------------------------------------------- -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPIWrapper: initializing MPI -------------------------------------------------------------------- -Build info: - - Built time: Aug 16 2016 03:09:16 - Last modified date: Fri Aug 12 05:28:23 2016 - Build type: Release - Build target: GPU - With 1bit-SGD: yes - Math lib: mkl - CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 - CUB_PATH: c:\src\cub-1.4.1 - CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda - Build Branch: HEAD - Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 - Built by svcphil on Philly-Pool1 - Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -------------------------------------------------------------------- -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPIWrapper: initializing MPI -------------------------------------------------------------------- -Build info: - - Built time: Aug 16 2016 03:09:16 - Last modified date: Fri Aug 12 05:28:23 2016 - Build type: Release - Build target: GPU - With 1bit-SGD: yes - Math lib: mkl - CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 - CUB_PATH: c:\src\cub-1.4.1 - CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda - Build Branch: HEAD - Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 - Built by svcphil on Philly-Pool1 - Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -------------------------------------------------------------------- -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPIWrapper: initializing MPI -ping [requestnodes (before change)]: 3 nodes pinging each other -ping [requestnodes (before change)]: 3 nodes pinging each other -ping [requestnodes (before change)]: 3 nodes pinging each other -ping [requestnodes (before change)]: all 3 nodes responded -ping [requestnodes (before change)]: all 3 nodes responded -ping [requestnodes (before change)]: all 3 nodes responded -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating) -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating) -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating) -ping [requestnodes (after change)]: 3 nodes pinging each other -ping [requestnodes (after change)]: 3 nodes pinging each other -ping [requestnodes (after change)]: 3 nodes pinging each other -ping [requestnodes (after change)]: all 3 nodes responded -ping [requestnodes (after change)]: all 3 nodes responded -ping [requestnodes (after change)]: all 3 nodes responded -mpihelper: we are cog 2 in a gearbox of 3 -mpihelper: we are cog 1 in a gearbox of 3 -mpihelper: we are cog 0 in a gearbox of 3 -ping [mpihelper]: 3 nodes pinging each other -ping [mpihelper]: 3 nodes pinging each other -ping [mpihelper]: 3 nodes pinging each other -ping [mpihelper]: all 3 nodes responded -ping [mpihelper]: all 3 nodes responded -ping [mpihelper]: all 3 nodes responded -MPI Rank 0: 08/16/2016 03:20:39: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank0 -MPI Rank 0: 08/16/2016 03:20:39: ------------------------------------------------------------------- -MPI Rank 0: 08/16/2016 03:20:39: Build info: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:39: Built time: Aug 16 2016 03:09:16 -MPI Rank 0: 08/16/2016 03:20:39: Last modified date: Fri Aug 12 05:28:23 2016 -MPI Rank 0: 08/16/2016 03:20:39: Build type: Release -MPI Rank 0: 08/16/2016 03:20:39: Build target: GPU -MPI Rank 0: 08/16/2016 03:20:39: With 1bit-SGD: yes -MPI Rank 0: 08/16/2016 03:20:39: Math lib: mkl -MPI Rank 0: 08/16/2016 03:20:39: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 -MPI Rank 0: 08/16/2016 03:20:39: CUB_PATH: c:\src\cub-1.4.1 -MPI Rank 0: 08/16/2016 03:20:39: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda -MPI Rank 0: 08/16/2016 03:20:39: Build Branch: HEAD -MPI Rank 0: 08/16/2016 03:20:39: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 -MPI Rank 0: 08/16/2016 03:20:39: Built by svcphil on Philly-Pool1 -MPI Rank 0: 08/16/2016 03:20:39: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -MPI Rank 0: 08/16/2016 03:20:39: ------------------------------------------------------------------- -MPI Rank 0: 08/16/2016 03:20:43: ------------------------------------------------------------------- -MPI Rank 0: 08/16/2016 03:20:43: GPU info: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:43: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 0: 08/16/2016 03:20:43: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 0: 08/16/2016 03:20:43: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 0: 08/16/2016 03:20:43: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 0: 08/16/2016 03:20:43: ------------------------------------------------------------------- -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:43: Running on DPHAIM-24 at 2016/08/16 03:20:43 -MPI Rank 0: 08/16/2016 03:20:43: Command line: -MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DeviceId=0 timestamping=true numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>> -MPI Rank 0: 08/16/2016 03:20:43: precision = "float" -MPI Rank 0: command = speechTrain -MPI Rank 0: deviceId = $DeviceId$ -MPI Rank 0: parallelTrain = true -MPI Rank 0: speechTrain = [ -MPI Rank 0: action = "train" -MPI Rank 0: modelPath = "$RunDir$/models/cntkSpeech.dnn" -MPI Rank 0: deviceId = $DeviceId$ -MPI Rank 0: traceLevel = 1 -MPI Rank 0: SimpleNetworkBuilder = [ -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 0: evalCriterion = "ClassificationError" -MPI Rank 0: layerTypes = "Sigmoid" -MPI Rank 0: initValueScale = 1.0 -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: uniformInit = true -MPI Rank 0: needPrior = true -MPI Rank 0: ] -MPI Rank 0: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = 'CE' -MPI Rank 0: evalCriterion = 'Err' -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: L = Length(layerSizes)-1 // number of model layers -MPI Rank 0: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 0: featNorm = if applyMeanVarNorm -MPI Rank 0: then MeanVarNorm(features) -MPI Rank 0: else features -MPI Rank 0: layers[layer:1..L-1] = if layer > 1 -MPI Rank 0: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 0: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 0: CE = if trainingCriterion == 'CE' -MPI Rank 0: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 0: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 0: Err = if evalCriterion == 'Err' then -MPI Rank 0: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 0: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 0: logPrior = LogPrior(labels) -MPI Rank 0: // TODO: how to add a tag to an infix operation? -MPI Rank 0: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 0: ] -MPI Rank 0: SGD = [ -MPI Rank 0: epochSize = 20480 -MPI Rank 0: minibatchSize = 64:256:1024 -MPI Rank 0: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 0: numMBsToShowResult = 10 -MPI Rank 0: momentumPerMB = 0.9:0.656119 -MPI Rank 0: dropoutRate = 0.0 -MPI Rank 0: maxEpochs = 3 -MPI Rank 0: keepCheckPointFiles = true -MPI Rank 0: clippingThresholdPerSample = 1#INF -MPI Rank 0: ParallelTrain = [ -MPI Rank 0: parallelizationMethod = "DataParallelSGD" -MPI Rank 0: distributedMBReading = true -MPI Rank 0: DataParallelSGD = [ -MPI Rank 0: gradientBits = 32 -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: AutoAdjust = [ -MPI Rank 0: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 0: loadBestModel = true -MPI Rank 0: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 0: learnRateDecreaseFactor = 0.5 -MPI Rank 0: learnRateIncreaseFactor = 1.382 -MPI Rank 0: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: reader = [ -MPI Rank 0: readerType = "HTKMLFReader" -MPI Rank 0: readMethod = "blockRandomize" -MPI Rank 0: miniBatchMode = "partial" -MPI Rank 0: randomize = "auto" -MPI Rank 0: verbosity = 0 -MPI Rank 0: useMersenneTwisterRand=true -MPI Rank 0: features = [ -MPI Rank 0: dim = 363 -MPI Rank 0: type = "real" -MPI Rank 0: scpFile = "glob_0000.scp" -MPI Rank 0: ] -MPI Rank 0: labels = [ -MPI Rank 0: mlfFile = "$DataDir$/glob_0000.mlf" -MPI Rank 0: labelMappingFile = "$DataDir$/state.list" -MPI Rank 0: labelDim = 132 -MPI Rank 0: labelType = "category" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 0: DeviceId=0 -MPI Rank 0: timestamping=true -MPI Rank 0: numCPUThreads=8 -MPI Rank 0: precision=double -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]] -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<< -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 0: 08/16/2016 03:20:43: precision = "float" -MPI Rank 0: command = speechTrain -MPI Rank 0: deviceId = 0 -MPI Rank 0: parallelTrain = true -MPI Rank 0: speechTrain = [ -MPI Rank 0: action = "train" -MPI Rank 0: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn" -MPI Rank 0: deviceId = 0 -MPI Rank 0: traceLevel = 1 -MPI Rank 0: SimpleNetworkBuilder = [ -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 0: evalCriterion = "ClassificationError" -MPI Rank 0: layerTypes = "Sigmoid" -MPI Rank 0: initValueScale = 1.0 -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: uniformInit = true -MPI Rank 0: needPrior = true -MPI Rank 0: ] -MPI Rank 0: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = 'CE' -MPI Rank 0: evalCriterion = 'Err' -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: L = Length(layerSizes)-1 // number of model layers -MPI Rank 0: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 0: featNorm = if applyMeanVarNorm -MPI Rank 0: then MeanVarNorm(features) -MPI Rank 0: else features -MPI Rank 0: layers[layer:1..L-1] = if layer > 1 -MPI Rank 0: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 0: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 0: CE = if trainingCriterion == 'CE' -MPI Rank 0: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 0: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 0: Err = if evalCriterion == 'Err' then -MPI Rank 0: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 0: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 0: logPrior = LogPrior(labels) -MPI Rank 0: // TODO: how to add a tag to an infix operation? -MPI Rank 0: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 0: ] -MPI Rank 0: SGD = [ -MPI Rank 0: epochSize = 20480 -MPI Rank 0: minibatchSize = 64:256:1024 -MPI Rank 0: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 0: numMBsToShowResult = 10 -MPI Rank 0: momentumPerMB = 0.9:0.656119 -MPI Rank 0: dropoutRate = 0.0 -MPI Rank 0: maxEpochs = 3 -MPI Rank 0: keepCheckPointFiles = true -MPI Rank 0: clippingThresholdPerSample = 1#INF -MPI Rank 0: ParallelTrain = [ -MPI Rank 0: parallelizationMethod = "DataParallelSGD" -MPI Rank 0: distributedMBReading = true -MPI Rank 0: DataParallelSGD = [ -MPI Rank 0: gradientBits = 32 -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: AutoAdjust = [ -MPI Rank 0: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 0: loadBestModel = true -MPI Rank 0: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 0: learnRateDecreaseFactor = 0.5 -MPI Rank 0: learnRateIncreaseFactor = 1.382 -MPI Rank 0: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: reader = [ -MPI Rank 0: readerType = "HTKMLFReader" -MPI Rank 0: readMethod = "blockRandomize" -MPI Rank 0: miniBatchMode = "partial" -MPI Rank 0: randomize = "auto" -MPI Rank 0: verbosity = 0 -MPI Rank 0: useMersenneTwisterRand=true -MPI Rank 0: features = [ -MPI Rank 0: dim = 363 -MPI Rank 0: type = "real" -MPI Rank 0: scpFile = "glob_0000.scp" -MPI Rank 0: ] -MPI Rank 0: labels = [ -MPI Rank 0: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 0: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 0: labelDim = 132 -MPI Rank 0: labelType = "category" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 0: DeviceId=0 -MPI Rank 0: timestamping=true -MPI Rank 0: numCPUThreads=8 -MPI Rank 0: precision=double -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]] -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 0: configparameters: cntk.cntk:command=speechTrain -MPI Rank 0: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 0: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: configparameters: cntk.cntk:deviceId=0 -MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=8 -MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true -MPI Rank 0: configparameters: cntk.cntk:precision=double -MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 0: configparameters: cntk.cntk:speechTrain=[ -MPI Rank 0: action = "train" -MPI Rank 0: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn" -MPI Rank 0: deviceId = 0 -MPI Rank 0: traceLevel = 1 -MPI Rank 0: SimpleNetworkBuilder = [ -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 0: evalCriterion = "ClassificationError" -MPI Rank 0: layerTypes = "Sigmoid" -MPI Rank 0: initValueScale = 1.0 -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: uniformInit = true -MPI Rank 0: needPrior = true -MPI Rank 0: ] -MPI Rank 0: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = 'CE' -MPI Rank 0: evalCriterion = 'Err' -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: L = Length(layerSizes)-1 // number of model layers -MPI Rank 0: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 0: featNorm = if applyMeanVarNorm -MPI Rank 0: then MeanVarNorm(features) -MPI Rank 0: else features -MPI Rank 0: layers[layer:1..L-1] = if layer > 1 -MPI Rank 0: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 0: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 0: CE = if trainingCriterion == 'CE' -MPI Rank 0: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 0: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 0: Err = if evalCriterion == 'Err' then -MPI Rank 0: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 0: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 0: logPrior = LogPrior(labels) -MPI Rank 0: // TODO: how to add a tag to an infix operation? -MPI Rank 0: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 0: ] -MPI Rank 0: SGD = [ -MPI Rank 0: epochSize = 20480 -MPI Rank 0: minibatchSize = 64:256:1024 -MPI Rank 0: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 0: numMBsToShowResult = 10 -MPI Rank 0: momentumPerMB = 0.9:0.656119 -MPI Rank 0: dropoutRate = 0.0 -MPI Rank 0: maxEpochs = 3 -MPI Rank 0: keepCheckPointFiles = true -MPI Rank 0: clippingThresholdPerSample = 1#INF -MPI Rank 0: ParallelTrain = [ -MPI Rank 0: parallelizationMethod = "DataParallelSGD" -MPI Rank 0: distributedMBReading = true -MPI Rank 0: DataParallelSGD = [ -MPI Rank 0: gradientBits = 32 -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: AutoAdjust = [ -MPI Rank 0: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 0: loadBestModel = true -MPI Rank 0: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 0: learnRateDecreaseFactor = 0.5 -MPI Rank 0: learnRateIncreaseFactor = 1.382 -MPI Rank 0: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: reader = [ -MPI Rank 0: readerType = "HTKMLFReader" -MPI Rank 0: readMethod = "blockRandomize" -MPI Rank 0: miniBatchMode = "partial" -MPI Rank 0: randomize = "auto" -MPI Rank 0: verbosity = 0 -MPI Rank 0: useMersenneTwisterRand=true -MPI Rank 0: features = [ -MPI Rank 0: dim = 363 -MPI Rank 0: type = "real" -MPI Rank 0: scpFile = "glob_0000.scp" -MPI Rank 0: ] -MPI Rank 0: labels = [ -MPI Rank 0: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 0: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 0: labelDim = 132 -MPI Rank 0: labelType = "category" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 0: -MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 0: configparameters: cntk.cntk:timestamping=true -MPI Rank 0: 08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 0: 08/16/2016 03:20:43: Commands: speechTrain -MPI Rank 0: 08/16/2016 03:20:43: Precision = "double" -MPI Rank 0: 08/16/2016 03:20:43: Using 8 CPU threads. -MPI Rank 0: 08/16/2016 03:20:43: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn -MPI Rank 0: 08/16/2016 03:20:43: CNTKCommandTrainInfo: speechTrain : 4 -MPI Rank 0: 08/16/2016 03:20:43: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4 -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:43: ############################################################################## -MPI Rank 0: 08/16/2016 03:20:43: # # -MPI Rank 0: 08/16/2016 03:20:43: # Action "train" # -MPI Rank 0: 08/16/2016 03:20:43: # # -MPI Rank 0: 08/16/2016 03:20:43: ############################################################################## -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:43: CNTKCommandTrainBegin: speechTrain -MPI Rank 0: SimpleNetworkBuilder Using GPU 0 -MPI Rank 0: reading script file glob_0000.scp ... 948 entries -MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 0: label set 0: 129 classes -MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:43: Creating virgin network. -MPI Rank 0: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000. -MPI Rank 0: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false). -MPI Rank 0: Microsoft::MSR::CNTK::GPUMatrix::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8 -MPI Rank 0: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 0: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 0: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000. -MPI Rank 0: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false). -MPI Rank 0: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 0: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 0: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000. -MPI Rank 0: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false). -MPI Rank 0: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 0: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 0: -MPI Rank 0: Post-processing network... -MPI Rank 0: -MPI Rank 0: 7 roots: -MPI Rank 0: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() -MPI Rank 0: EvalClassificationError = ClassificationError() -MPI Rank 0: InvStdOfFeatures = InvStdDev() -MPI Rank 0: MeanOfFeatures = Mean() -MPI Rank 0: PosteriorProb = Softmax() -MPI Rank 0: Prior = Mean() -MPI Rank 0: ScaledLogLikelihood = Minus() -MPI Rank 0: -MPI Rank 0: Validating network. 25 nodes to process in pass 1. -MPI Rank 0: -MPI Rank 0: Validating --> labels = InputValue() : -> [132 x *] -MPI Rank 0: Validating --> W2 = LearnableParameter() : -> [132 x 512] -MPI Rank 0: Validating --> W1 = LearnableParameter() : -> [512 x 512] -MPI Rank 0: Validating --> W0 = LearnableParameter() : -> [512 x 363] -MPI Rank 0: Validating --> features = InputValue() : -> [363 x *] -MPI Rank 0: Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363] -MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363] -MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *] -MPI Rank 0: Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *] -MPI Rank 0: Validating --> B0 = LearnableParameter() : -> [512 x 1] -MPI Rank 0: Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 0: Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 0: Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 0: Validating --> B1 = LearnableParameter() : -> [512 x 1] -MPI Rank 0: Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 0: Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 0: Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *] -MPI Rank 0: Validating --> B2 = LearnableParameter() : -> [132 x 1] -MPI Rank 0: Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *] -MPI Rank 0: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 0: Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 0: Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *] -MPI Rank 0: Validating --> Prior = Mean (labels) : [132 x *] -> [132] -MPI Rank 0: Validating --> LogOfPrior = Log (Prior) : [132] -> [132] -MPI Rank 0: Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *] -MPI Rank 0: -MPI Rank 0: Validating network. 17 nodes to process in pass 2. -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: Validating network, final pass. -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input data. -MPI Rank 0: -MPI Rank 0: Post-processing network complete. -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:44: Created model with 25 nodes on GPU 0. -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:44: Training criterion node(s): -MPI Rank 0: 08/16/2016 03:20:44: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:44: Evaluation criterion node(s): -MPI Rank 0: 08/16/2016 03:20:44: EvalClassificationError = ClassificationError -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: Allocating matrices for forward and/or backward propagation. -MPI Rank 0: -MPI Rank 0: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 0: -MPI Rank 0: { H1 : [512 x 1 x *] -MPI Rank 0: W0*features : [512 x *] (gradient) } -MPI Rank 0: { B0 : [512 x 1] (gradient) -MPI Rank 0: H1 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 0: W2*H1 : [132 x 1 x *] } -MPI Rank 0: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1 : [512 x 1 x *] } -MPI Rank 0: { W1 : [512 x 512] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 0: { HLast : [132 x 1 x *] -MPI Rank 0: W2 : [132 x 512] (gradient) } -MPI Rank 0: { H2 : [512 x 1 x *] -MPI Rank 0: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 0: { B1 : [512 x 1] (gradient) -MPI Rank 0: H2 : [512 x 1 x *] (gradient) -MPI Rank 0: HLast : [132 x 1 x *] (gradient) } -MPI Rank 0: { W0 : [512 x 363] (gradient) -MPI Rank 0: W0*features+B0 : [512 x 1 x *] } -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:44: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:44: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 08/16/2016 03:20:44: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 08/16/2016 03:20:44: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 0: 08/16/2016 03:20:44: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 0: 08/16/2016 03:20:44: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 0: 08/16/2016 03:20:44: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:44: Precomputing --> 3 PreCompute nodes found. -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:44: MeanOfFeatures = Mean() -MPI Rank 0: 08/16/2016 03:20:44: InvStdOfFeatures = InvStdDev() -MPI Rank 0: 08/16/2016 03:20:44: Prior = Mean() -MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:50: Precomputing --> Completed. -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:50: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:50: Starting minibatch loop. -MPI Rank 0: 08/16/2016 03:20:50: Epoch[ 1 of 4]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.0848s; samplesPerSecond = 7548.6 -MPI Rank 0: 08/16/2016 03:20:50: Epoch[ 1 of 4]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.0856s; samplesPerSecond = 7479.1 -MPI Rank 0: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.0987s; samplesPerSecond = 6482.7 -MPI Rank 0: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.73643568 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.1003s; samplesPerSecond = 6382.4 -MPI Rank 0: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83079081 * 640; EvalClassificationError = 0.88281250 * 640; time = 0.0971s; samplesPerSecond = 6589.0 -MPI Rank 0: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71437689 * 640; EvalClassificationError = 0.86875000 * 640; time = 0.1170s; samplesPerSecond = 5469.0 -MPI Rank 0: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.42186230 * 640; EvalClassificationError = 0.79062500 * 640; time = 0.0966s; samplesPerSecond = 6624.2 -MPI Rank 0: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53658053 * 640; EvalClassificationError = 0.82031250 * 640; time = 0.1007s; samplesPerSecond = 6355.6 -MPI Rank 0: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.49758017 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.0970s; samplesPerSecond = 6595.0 -MPI Rank 0: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39996308 * 640; EvalClassificationError = 0.80468750 * 640; time = 0.0989s; samplesPerSecond = 6474.5 -MPI Rank 0: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.49445773 * 640; EvalClassificationError = 0.82500000 * 640; time = 0.0895s; samplesPerSecond = 7151.2 -MPI Rank 0: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.26676999 * 640; EvalClassificationError = 0.79218750 * 640; time = 0.0982s; samplesPerSecond = 6515.7 -MPI Rank 0: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.18870173 * 640; EvalClassificationError = 0.78906250 * 640; time = 0.1044s; samplesPerSecond = 6130.0 -MPI Rank 0: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.05687264 * 640; EvalClassificationError = 0.74687500 * 640; time = 0.0895s; samplesPerSecond = 7150.4 -MPI Rank 0: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.95594569 * 640; EvalClassificationError = 0.71875000 * 640; time = 0.1063s; samplesPerSecond = 6020.5 -MPI Rank 0: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.10219604 * 640; EvalClassificationError = 0.74062500 * 640; time = 0.1047s; samplesPerSecond = 6114.2 -MPI Rank 0: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.80745014 * 640; EvalClassificationError = 0.70625000 * 640; time = 0.0983s; samplesPerSecond = 6508.9 -MPI Rank 0: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72061842 * 640; EvalClassificationError = 0.65468750 * 640; time = 0.1114s; samplesPerSecond = 5745.5 -MPI Rank 0: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80425747 * 640; EvalClassificationError = 0.71718750 * 640; time = 0.0919s; samplesPerSecond = 6961.3 -MPI Rank 0: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.71253068 * 640; EvalClassificationError = 0.67812500 * 640; time = 0.0783s; samplesPerSecond = 8171.8 -MPI Rank 0: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.59360399 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.0976s; samplesPerSecond = 6558.1 -MPI Rank 0: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60386649 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.0989s; samplesPerSecond = 6469.9 -MPI Rank 0: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53706678 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.1018s; samplesPerSecond = 6287.0 -MPI Rank 0: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.56177343 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.0913s; samplesPerSecond = 7012.0 -MPI Rank 0: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.50118791 * 640; EvalClassificationError = 0.64218750 * 640; time = 0.1004s; samplesPerSecond = 6375.1 -MPI Rank 0: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.40119788 * 640; EvalClassificationError = 0.62500000 * 640; time = 0.1030s; samplesPerSecond = 6212.9 -MPI Rank 0: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27491503 * 640; EvalClassificationError = 0.58906250 * 640; time = 0.1058s; samplesPerSecond = 6051.7 -MPI Rank 0: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.51724208 * 640; EvalClassificationError = 0.65781250 * 640; time = 0.1072s; samplesPerSecond = 5972.8 -MPI Rank 0: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27797542 * 640; EvalClassificationError = 0.59687500 * 640; time = 0.0980s; samplesPerSecond = 6529.9 -MPI Rank 0: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26017740 * 640; EvalClassificationError = 0.60937500 * 640; time = 0.0954s; samplesPerSecond = 6710.5 -MPI Rank 0: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24735342 * 640; EvalClassificationError = 0.58437500 * 640; time = 0.0957s; samplesPerSecond = 6684.5 -MPI Rank 0: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.23665382 * 640; EvalClassificationError = 0.60625000 * 640; time = 0.0673s; samplesPerSecond = 9509.2 -MPI Rank 0: 08/16/2016 03:20:53: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.03815141 * 20480; EvalClassificationError = 0.73432617 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.12839s -MPI Rank 0: 08/16/2016 03:20:53: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.1' -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:53: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 0: Actual gradient aggregation time: 0.021385 -MPI Rank 0: Async gradient aggregation wait time: 0.006373 -MPI Rank 0: Actual gradient aggregation time: 0.017647 -MPI Rank 0: 08/16/2016 03:20:54: Epoch[ 2 of 4]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.22369214 * 2304; EvalClassificationError = 0.61111111 * 2304; time = 0.1981s; samplesPerSecond = 11632.4 -MPI Rank 0: Async gradient aggregation wait time: 0.009233 -MPI Rank 0: Actual gradient aggregation time: 0.018749 -MPI Rank 0: Async gradient aggregation wait time: 0.000877 -MPI Rank 0: Actual gradient aggregation time: 0.016851 -MPI Rank 0: 08/16/2016 03:20:54: Epoch[ 2 of 4]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.23347641 * 2560; EvalClassificationError = 0.58320313 * 2560; time = 0.1783s; samplesPerSecond = 14361.8 -MPI Rank 0: Async gradient aggregation wait time: 0.004752 -MPI Rank 0: Actual gradient aggregation time: 0.016704 -MPI Rank 0: Async gradient aggregation wait time: 0.00476 -MPI Rank 0: Actual gradient aggregation time: 0.01635 -MPI Rank 0: 08/16/2016 03:20:54: Epoch[ 2 of 4]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.16589380 * 2560; EvalClassificationError = 0.57617188 * 2560; time = 0.1731s; samplesPerSecond = 14792.0 -MPI Rank 0: Async gradient aggregation wait time: 0.004797 -MPI Rank 0: Actual gradient aggregation time: 0.01733 -MPI Rank 0: Async gradient aggregation wait time: 0.007635 -MPI Rank 0: Actual gradient aggregation time: 0.017449 -MPI Rank 0: 08/16/2016 03:20:54: Epoch[ 2 of 4]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.17067204 * 2560; EvalClassificationError = 0.60664063 * 2560; time = 0.1734s; samplesPerSecond = 14762.2 -MPI Rank 0: Async gradient aggregation wait time: 0.005846 -MPI Rank 0: Actual gradient aggregation time: 0.017083 -MPI Rank 0: Async gradient aggregation wait time: 0.004504 -MPI Rank 0: Actual gradient aggregation time: 0.016877 -MPI Rank 0: 08/16/2016 03:20:54: Epoch[ 2 of 4]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.18189249 * 2560; EvalClassificationError = 0.58945313 * 2560; time = 0.1711s; samplesPerSecond = 14959.0 -MPI Rank 0: Async gradient aggregation wait time: 0.005357 -MPI Rank 0: Actual gradient aggregation time: 0.016769 -MPI Rank 0: Async gradient aggregation wait time: 0.007533 -MPI Rank 0: Actual gradient aggregation time: 0.016959 -MPI Rank 0: 08/16/2016 03:20:55: Epoch[ 2 of 4]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.08724350 * 2560; EvalClassificationError = 0.56562500 * 2560; time = 0.1716s; samplesPerSecond = 14915.7 -MPI Rank 0: Async gradient aggregation wait time: 0.00462 -MPI Rank 0: Actual gradient aggregation time: 0.015866 -MPI Rank 0: Async gradient aggregation wait time: 0.00912 -MPI Rank 0: Actual gradient aggregation time: 0.017219 -MPI Rank 0: 08/16/2016 03:20:55: Epoch[ 2 of 4]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 2.09218108 * 2560; EvalClassificationError = 0.59179688 * 2560; time = 0.1732s; samplesPerSecond = 14777.4 -MPI Rank 0: Async gradient aggregation wait time: 0.007759 -MPI Rank 0: Actual gradient aggregation time: 0.016693 -MPI Rank 0: Async gradient aggregation wait time: 0.008586 -MPI Rank 0: Actual gradient aggregation time: 0.016108 -MPI Rank 0: 08/16/2016 03:20:55: Epoch[ 2 of 4]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 2.10191157 * 2560; EvalClassificationError = 0.58632812 * 2560; time = 0.1678s; samplesPerSecond = 15254.6 -MPI Rank 0: Async gradient aggregation wait time: 0.006567 -MPI Rank 0: Actual gradient aggregation time: 0.006809 -MPI Rank 0: 08/16/2016 03:20:55: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.15620068 * 20480; EvalClassificationError = 0.58857422 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.42703s -MPI Rank 0: 08/16/2016 03:20:55: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.2' -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:55: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 0: Async gradient aggregation wait time: 0.014735 -MPI Rank 0: Actual gradient aggregation time: 0.03433 -MPI Rank 0: Async gradient aggregation wait time: 0.004733 -MPI Rank 0: Actual gradient aggregation time: 0.029133 -MPI Rank 0: 08/16/2016 03:20:55: Epoch[ 3 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 2.11767394 * 9216; EvalClassificationError = 0.56510417 * 9216; time = 0.3403s; samplesPerSecond = 27084.1 -MPI Rank 0: Async gradient aggregation wait time: 0.015933 -MPI Rank 0: Actual gradient aggregation time: 0.030693 -MPI Rank 0: Async gradient aggregation wait time: 0.015901 -MPI Rank 0: Actual gradient aggregation time: 0.028981 -MPI Rank 0: 08/16/2016 03:20:56: Epoch[ 3 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 2.08282316 * 10240; EvalClassificationError = 0.56914062 * 10240; time = 0.3115s; samplesPerSecond = 32875.8 -MPI Rank 0: 08/16/2016 03:20:56: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.09667113 * 20480; EvalClassificationError = 0.56689453 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.677633s -MPI Rank 0: 08/16/2016 03:20:56: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.3' -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:56: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 0: Async gradient aggregation wait time: 0.004776 -MPI Rank 0: Actual gradient aggregation time: 0.028351 -MPI Rank 0: Async gradient aggregation wait time: 0.008151 -MPI Rank 0: Actual gradient aggregation time: 0.028006 -MPI Rank 0: 08/16/2016 03:20:56: Epoch[ 4 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.98353176 * 9216; EvalClassificationError = 0.53982205 * 9216; time = 0.3011s; samplesPerSecond = 30607.6 -MPI Rank 0: Async gradient aggregation wait time: 0.014923 -MPI Rank 0: Actual gradient aggregation time: 0.029642 -MPI Rank 0: Async gradient aggregation wait time: 0.005029 -MPI Rank 0: Actual gradient aggregation time: 0.028009 -MPI Rank 0: 08/16/2016 03:20:56: Epoch[ 4 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.96715780 * 10240; EvalClassificationError = 0.53369141 * 10240; time = 0.2962s; samplesPerSecond = 34566.3 -MPI Rank 0: Async gradient aggregation wait time: 0.00696 -MPI Rank 0: 08/16/2016 03:20:56: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.97591825 * 20480; EvalClassificationError = 0.53642578 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=0.616271s -MPI Rank 0: 08/16/2016 03:20:56: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn' -MPI Rank 0: 08/16/2016 03:20:56: CNTKCommandTrainEnd: speechTrain -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:56: Action "train" complete. -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:57: __COMPLETED__ -MPI Rank 0: ~MPIWrapper -MPI Rank 1: 08/16/2016 03:20:40: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank1 -MPI Rank 1: 08/16/2016 03:20:40: ------------------------------------------------------------------- -MPI Rank 1: 08/16/2016 03:20:40: Build info: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:40: Built time: Aug 16 2016 03:09:16 -MPI Rank 1: 08/16/2016 03:20:40: Last modified date: Fri Aug 12 05:28:23 2016 -MPI Rank 1: 08/16/2016 03:20:40: Build type: Release -MPI Rank 1: 08/16/2016 03:20:40: Build target: GPU -MPI Rank 1: 08/16/2016 03:20:40: With 1bit-SGD: yes -MPI Rank 1: 08/16/2016 03:20:40: Math lib: mkl -MPI Rank 1: 08/16/2016 03:20:40: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 -MPI Rank 1: 08/16/2016 03:20:40: CUB_PATH: c:\src\cub-1.4.1 -MPI Rank 1: 08/16/2016 03:20:40: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda -MPI Rank 1: 08/16/2016 03:20:40: Build Branch: HEAD -MPI Rank 1: 08/16/2016 03:20:40: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 -MPI Rank 1: 08/16/2016 03:20:40: Built by svcphil on Philly-Pool1 -MPI Rank 1: 08/16/2016 03:20:40: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -MPI Rank 1: 08/16/2016 03:20:40: ------------------------------------------------------------------- -MPI Rank 1: 08/16/2016 03:20:43: ------------------------------------------------------------------- -MPI Rank 1: 08/16/2016 03:20:43: GPU info: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:43: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 1: 08/16/2016 03:20:43: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 1: 08/16/2016 03:20:43: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 1: 08/16/2016 03:20:43: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 1: 08/16/2016 03:20:43: ------------------------------------------------------------------- -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:43: Running on DPHAIM-24 at 2016/08/16 03:20:43 -MPI Rank 1: 08/16/2016 03:20:43: Command line: -MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DeviceId=0 timestamping=true numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>> -MPI Rank 1: 08/16/2016 03:20:43: precision = "float" -MPI Rank 1: command = speechTrain -MPI Rank 1: deviceId = $DeviceId$ -MPI Rank 1: parallelTrain = true -MPI Rank 1: speechTrain = [ -MPI Rank 1: action = "train" -MPI Rank 1: modelPath = "$RunDir$/models/cntkSpeech.dnn" -MPI Rank 1: deviceId = $DeviceId$ -MPI Rank 1: traceLevel = 1 -MPI Rank 1: SimpleNetworkBuilder = [ -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 1: evalCriterion = "ClassificationError" -MPI Rank 1: layerTypes = "Sigmoid" -MPI Rank 1: initValueScale = 1.0 -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: uniformInit = true -MPI Rank 1: needPrior = true -MPI Rank 1: ] -MPI Rank 1: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = 'CE' -MPI Rank 1: evalCriterion = 'Err' -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: L = Length(layerSizes)-1 // number of model layers -MPI Rank 1: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 1: featNorm = if applyMeanVarNorm -MPI Rank 1: then MeanVarNorm(features) -MPI Rank 1: else features -MPI Rank 1: layers[layer:1..L-1] = if layer > 1 -MPI Rank 1: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 1: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 1: CE = if trainingCriterion == 'CE' -MPI Rank 1: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 1: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 1: Err = if evalCriterion == 'Err' then -MPI Rank 1: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 1: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 1: logPrior = LogPrior(labels) -MPI Rank 1: // TODO: how to add a tag to an infix operation? -MPI Rank 1: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 1: ] -MPI Rank 1: SGD = [ -MPI Rank 1: epochSize = 20480 -MPI Rank 1: minibatchSize = 64:256:1024 -MPI Rank 1: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 1: numMBsToShowResult = 10 -MPI Rank 1: momentumPerMB = 0.9:0.656119 -MPI Rank 1: dropoutRate = 0.0 -MPI Rank 1: maxEpochs = 3 -MPI Rank 1: keepCheckPointFiles = true -MPI Rank 1: clippingThresholdPerSample = 1#INF -MPI Rank 1: ParallelTrain = [ -MPI Rank 1: parallelizationMethod = "DataParallelSGD" -MPI Rank 1: distributedMBReading = true -MPI Rank 1: DataParallelSGD = [ -MPI Rank 1: gradientBits = 32 -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: AutoAdjust = [ -MPI Rank 1: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 1: loadBestModel = true -MPI Rank 1: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 1: learnRateDecreaseFactor = 0.5 -MPI Rank 1: learnRateIncreaseFactor = 1.382 -MPI Rank 1: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: reader = [ -MPI Rank 1: readerType = "HTKMLFReader" -MPI Rank 1: readMethod = "blockRandomize" -MPI Rank 1: miniBatchMode = "partial" -MPI Rank 1: randomize = "auto" -MPI Rank 1: verbosity = 0 -MPI Rank 1: useMersenneTwisterRand=true -MPI Rank 1: features = [ -MPI Rank 1: dim = 363 -MPI Rank 1: type = "real" -MPI Rank 1: scpFile = "glob_0000.scp" -MPI Rank 1: ] -MPI Rank 1: labels = [ -MPI Rank 1: mlfFile = "$DataDir$/glob_0000.mlf" -MPI Rank 1: labelMappingFile = "$DataDir$/state.list" -MPI Rank 1: labelDim = 132 -MPI Rank 1: labelType = "category" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 1: DeviceId=0 -MPI Rank 1: timestamping=true -MPI Rank 1: numCPUThreads=8 -MPI Rank 1: precision=double -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]] -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<< -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 1: 08/16/2016 03:20:43: precision = "float" -MPI Rank 1: command = speechTrain -MPI Rank 1: deviceId = 0 -MPI Rank 1: parallelTrain = true -MPI Rank 1: speechTrain = [ -MPI Rank 1: action = "train" -MPI Rank 1: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn" -MPI Rank 1: deviceId = 0 -MPI Rank 1: traceLevel = 1 -MPI Rank 1: SimpleNetworkBuilder = [ -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 1: evalCriterion = "ClassificationError" -MPI Rank 1: layerTypes = "Sigmoid" -MPI Rank 1: initValueScale = 1.0 -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: uniformInit = true -MPI Rank 1: needPrior = true -MPI Rank 1: ] -MPI Rank 1: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = 'CE' -MPI Rank 1: evalCriterion = 'Err' -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: L = Length(layerSizes)-1 // number of model layers -MPI Rank 1: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 1: featNorm = if applyMeanVarNorm -MPI Rank 1: then MeanVarNorm(features) -MPI Rank 1: else features -MPI Rank 1: layers[layer:1..L-1] = if layer > 1 -MPI Rank 1: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 1: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 1: CE = if trainingCriterion == 'CE' -MPI Rank 1: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 1: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 1: Err = if evalCriterion == 'Err' then -MPI Rank 1: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 1: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 1: logPrior = LogPrior(labels) -MPI Rank 1: // TODO: how to add a tag to an infix operation? -MPI Rank 1: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 1: ] -MPI Rank 1: SGD = [ -MPI Rank 1: epochSize = 20480 -MPI Rank 1: minibatchSize = 64:256:1024 -MPI Rank 1: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 1: numMBsToShowResult = 10 -MPI Rank 1: momentumPerMB = 0.9:0.656119 -MPI Rank 1: dropoutRate = 0.0 -MPI Rank 1: maxEpochs = 3 -MPI Rank 1: keepCheckPointFiles = true -MPI Rank 1: clippingThresholdPerSample = 1#INF -MPI Rank 1: ParallelTrain = [ -MPI Rank 1: parallelizationMethod = "DataParallelSGD" -MPI Rank 1: distributedMBReading = true -MPI Rank 1: DataParallelSGD = [ -MPI Rank 1: gradientBits = 32 -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: AutoAdjust = [ -MPI Rank 1: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 1: loadBestModel = true -MPI Rank 1: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 1: learnRateDecreaseFactor = 0.5 -MPI Rank 1: learnRateIncreaseFactor = 1.382 -MPI Rank 1: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: reader = [ -MPI Rank 1: readerType = "HTKMLFReader" -MPI Rank 1: readMethod = "blockRandomize" -MPI Rank 1: miniBatchMode = "partial" -MPI Rank 1: randomize = "auto" -MPI Rank 1: verbosity = 0 -MPI Rank 1: useMersenneTwisterRand=true -MPI Rank 1: features = [ -MPI Rank 1: dim = 363 -MPI Rank 1: type = "real" -MPI Rank 1: scpFile = "glob_0000.scp" -MPI Rank 1: ] -MPI Rank 1: labels = [ -MPI Rank 1: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 1: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 1: labelDim = 132 -MPI Rank 1: labelType = "category" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 1: DeviceId=0 -MPI Rank 1: timestamping=true -MPI Rank 1: numCPUThreads=8 -MPI Rank 1: precision=double -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]] -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:44: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:44: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 1: configparameters: cntk.cntk:command=speechTrain -MPI Rank 1: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 1: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: configparameters: cntk.cntk:deviceId=0 -MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=8 -MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true -MPI Rank 1: configparameters: cntk.cntk:precision=double -MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 1: configparameters: cntk.cntk:speechTrain=[ -MPI Rank 1: action = "train" -MPI Rank 1: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn" -MPI Rank 1: deviceId = 0 -MPI Rank 1: traceLevel = 1 -MPI Rank 1: SimpleNetworkBuilder = [ -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 1: evalCriterion = "ClassificationError" -MPI Rank 1: layerTypes = "Sigmoid" -MPI Rank 1: initValueScale = 1.0 -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: uniformInit = true -MPI Rank 1: needPrior = true -MPI Rank 1: ] -MPI Rank 1: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = 'CE' -MPI Rank 1: evalCriterion = 'Err' -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: L = Length(layerSizes)-1 // number of model layers -MPI Rank 1: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 1: featNorm = if applyMeanVarNorm -MPI Rank 1: then MeanVarNorm(features) -MPI Rank 1: else features -MPI Rank 1: layers[layer:1..L-1] = if layer > 1 -MPI Rank 1: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 1: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 1: CE = if trainingCriterion == 'CE' -MPI Rank 1: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 1: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 1: Err = if evalCriterion == 'Err' then -MPI Rank 1: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 1: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 1: logPrior = LogPrior(labels) -MPI Rank 1: // TODO: how to add a tag to an infix operation? -MPI Rank 1: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 1: ] -MPI Rank 1: SGD = [ -MPI Rank 1: epochSize = 20480 -MPI Rank 1: minibatchSize = 64:256:1024 -MPI Rank 1: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 1: numMBsToShowResult = 10 -MPI Rank 1: momentumPerMB = 0.9:0.656119 -MPI Rank 1: dropoutRate = 0.0 -MPI Rank 1: maxEpochs = 3 -MPI Rank 1: keepCheckPointFiles = true -MPI Rank 1: clippingThresholdPerSample = 1#INF -MPI Rank 1: ParallelTrain = [ -MPI Rank 1: parallelizationMethod = "DataParallelSGD" -MPI Rank 1: distributedMBReading = true -MPI Rank 1: DataParallelSGD = [ -MPI Rank 1: gradientBits = 32 -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: AutoAdjust = [ -MPI Rank 1: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 1: loadBestModel = true -MPI Rank 1: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 1: learnRateDecreaseFactor = 0.5 -MPI Rank 1: learnRateIncreaseFactor = 1.382 -MPI Rank 1: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: reader = [ -MPI Rank 1: readerType = "HTKMLFReader" -MPI Rank 1: readMethod = "blockRandomize" -MPI Rank 1: miniBatchMode = "partial" -MPI Rank 1: randomize = "auto" -MPI Rank 1: verbosity = 0 -MPI Rank 1: useMersenneTwisterRand=true -MPI Rank 1: features = [ -MPI Rank 1: dim = 363 -MPI Rank 1: type = "real" -MPI Rank 1: scpFile = "glob_0000.scp" -MPI Rank 1: ] -MPI Rank 1: labels = [ -MPI Rank 1: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 1: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 1: labelDim = 132 -MPI Rank 1: labelType = "category" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 1: -MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 1: configparameters: cntk.cntk:timestamping=true -MPI Rank 1: 08/16/2016 03:20:44: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 1: 08/16/2016 03:20:44: Commands: speechTrain -MPI Rank 1: 08/16/2016 03:20:44: Precision = "double" -MPI Rank 1: 08/16/2016 03:20:44: Using 8 CPU threads. -MPI Rank 1: 08/16/2016 03:20:44: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn -MPI Rank 1: 08/16/2016 03:20:44: CNTKCommandTrainInfo: speechTrain : 4 -MPI Rank 1: 08/16/2016 03:20:44: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4 -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:44: ############################################################################## -MPI Rank 1: 08/16/2016 03:20:44: # # -MPI Rank 1: 08/16/2016 03:20:44: # Action "train" # -MPI Rank 1: 08/16/2016 03:20:44: # # -MPI Rank 1: 08/16/2016 03:20:44: ############################################################################## -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:44: CNTKCommandTrainBegin: speechTrain -MPI Rank 1: SimpleNetworkBuilder Using GPU 0 -MPI Rank 1: reading script file glob_0000.scp ... 948 entries -MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 1: label set 0: 129 classes -MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:44: Creating virgin network. -MPI Rank 1: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000. -MPI Rank 1: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false). -MPI Rank 1: Microsoft::MSR::CNTK::GPUMatrix::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8 -MPI Rank 1: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 1: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 1: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000. -MPI Rank 1: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false). -MPI Rank 1: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 1: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 1: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000. -MPI Rank 1: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false). -MPI Rank 1: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 1: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 1: -MPI Rank 1: Post-processing network... -MPI Rank 1: -MPI Rank 1: 7 roots: -MPI Rank 1: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() -MPI Rank 1: EvalClassificationError = ClassificationError() -MPI Rank 1: InvStdOfFeatures = InvStdDev() -MPI Rank 1: MeanOfFeatures = Mean() -MPI Rank 1: PosteriorProb = Softmax() -MPI Rank 1: Prior = Mean() -MPI Rank 1: ScaledLogLikelihood = Minus() -MPI Rank 1: -MPI Rank 1: Validating network. 25 nodes to process in pass 1. -MPI Rank 1: -MPI Rank 1: Validating --> labels = InputValue() : -> [132 x *] -MPI Rank 1: Validating --> W2 = LearnableParameter() : -> [132 x 512] -MPI Rank 1: Validating --> W1 = LearnableParameter() : -> [512 x 512] -MPI Rank 1: Validating --> W0 = LearnableParameter() : -> [512 x 363] -MPI Rank 1: Validating --> features = InputValue() : -> [363 x *] -MPI Rank 1: Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363] -MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363] -MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *] -MPI Rank 1: Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *] -MPI Rank 1: Validating --> B0 = LearnableParameter() : -> [512 x 1] -MPI Rank 1: Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 1: Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 1: Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 1: Validating --> B1 = LearnableParameter() : -> [512 x 1] -MPI Rank 1: Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 1: Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 1: Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *] -MPI Rank 1: Validating --> B2 = LearnableParameter() : -> [132 x 1] -MPI Rank 1: Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *] -MPI Rank 1: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 1: Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 1: Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *] -MPI Rank 1: Validating --> Prior = Mean (labels) : [132 x *] -> [132] -MPI Rank 1: Validating --> LogOfPrior = Log (Prior) : [132] -> [132] -MPI Rank 1: Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *] -MPI Rank 1: -MPI Rank 1: Validating network. 17 nodes to process in pass 2. -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: Validating network, final pass. -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input data. -MPI Rank 1: -MPI Rank 1: Post-processing network complete. -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:45: Created model with 25 nodes on GPU 0. -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:45: Training criterion node(s): -MPI Rank 1: 08/16/2016 03:20:45: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:45: Evaluation criterion node(s): -MPI Rank 1: 08/16/2016 03:20:45: EvalClassificationError = ClassificationError -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: Allocating matrices for forward and/or backward propagation. -MPI Rank 1: -MPI Rank 1: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 1: -MPI Rank 1: { B1 : [512 x 1] (gradient) -MPI Rank 1: H2 : [512 x 1 x *] (gradient) -MPI Rank 1: HLast : [132 x 1 x *] (gradient) } -MPI Rank 1: { W0 : [512 x 363] (gradient) -MPI Rank 1: W0*features+B0 : [512 x 1 x *] } -MPI Rank 1: { HLast : [132 x 1 x *] -MPI Rank 1: W2 : [132 x 512] (gradient) } -MPI Rank 1: { H2 : [512 x 1 x *] -MPI Rank 1: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 1: { H1 : [512 x 1 x *] -MPI Rank 1: W0*features : [512 x *] (gradient) } -MPI Rank 1: { W1 : [512 x 512] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 1: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1 : [512 x 1 x *] } -MPI Rank 1: { B0 : [512 x 1] (gradient) -MPI Rank 1: H1 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 1: W2*H1 : [132 x 1 x *] } -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:45: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:45: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 08/16/2016 03:20:45: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 08/16/2016 03:20:45: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 1: 08/16/2016 03:20:45: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 1: 08/16/2016 03:20:45: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 1: 08/16/2016 03:20:45: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:45: Precomputing --> 3 PreCompute nodes found. -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:45: MeanOfFeatures = Mean() -MPI Rank 1: 08/16/2016 03:20:45: InvStdOfFeatures = InvStdDev() -MPI Rank 1: 08/16/2016 03:20:45: Prior = Mean() -MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:50: Precomputing --> Completed. -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:50: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:50: Starting minibatch loop. -MPI Rank 1: 08/16/2016 03:20:50: Epoch[ 1 of 4]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.1258s; samplesPerSecond = 5085.8 -MPI Rank 1: 08/16/2016 03:20:50: Epoch[ 1 of 4]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.1021s; samplesPerSecond = 6269.4 -MPI Rank 1: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.0950s; samplesPerSecond = 6739.7 -MPI Rank 1: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.73643568 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.1003s; samplesPerSecond = 6378.9 -MPI Rank 1: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83079081 * 640; EvalClassificationError = 0.88281250 * 640; time = 0.0998s; samplesPerSecond = 6411.5 -MPI Rank 1: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71437689 * 640; EvalClassificationError = 0.86875000 * 640; time = 0.0817s; samplesPerSecond = 7830.8 -MPI Rank 1: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.42186230 * 640; EvalClassificationError = 0.79062500 * 640; time = 0.1018s; samplesPerSecond = 6285.0 -MPI Rank 1: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53658053 * 640; EvalClassificationError = 0.82031250 * 640; time = 0.1112s; samplesPerSecond = 5754.3 -MPI Rank 1: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.49758017 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.1121s; samplesPerSecond = 5706.8 -MPI Rank 1: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39996308 * 640; EvalClassificationError = 0.80468750 * 640; time = 0.1090s; samplesPerSecond = 5870.2 -MPI Rank 1: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.49445773 * 640; EvalClassificationError = 0.82500000 * 640; time = 0.0955s; samplesPerSecond = 6702.7 -MPI Rank 1: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.26676999 * 640; EvalClassificationError = 0.79218750 * 640; time = 0.1005s; samplesPerSecond = 6370.6 -MPI Rank 1: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.18870173 * 640; EvalClassificationError = 0.78906250 * 640; time = 0.1134s; samplesPerSecond = 5644.0 -MPI Rank 1: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.05687264 * 640; EvalClassificationError = 0.74687500 * 640; time = 0.0977s; samplesPerSecond = 6548.3 -MPI Rank 1: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.95594569 * 640; EvalClassificationError = 0.71875000 * 640; time = 0.0888s; samplesPerSecond = 7204.6 -MPI Rank 1: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.10219604 * 640; EvalClassificationError = 0.74062500 * 640; time = 0.1079s; samplesPerSecond = 5931.9 -MPI Rank 1: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.80745014 * 640; EvalClassificationError = 0.70625000 * 640; time = 0.0987s; samplesPerSecond = 6483.4 -MPI Rank 1: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72061842 * 640; EvalClassificationError = 0.65468750 * 640; time = 0.0913s; samplesPerSecond = 7009.7 -MPI Rank 1: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80425747 * 640; EvalClassificationError = 0.71718750 * 640; time = 0.1129s; samplesPerSecond = 5668.2 -MPI Rank 1: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.71253068 * 640; EvalClassificationError = 0.67812500 * 640; time = 0.0997s; samplesPerSecond = 6422.2 -MPI Rank 1: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.59360399 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.0989s; samplesPerSecond = 6472.4 -MPI Rank 1: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60386649 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.1053s; samplesPerSecond = 6077.0 -MPI Rank 1: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53706678 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.1044s; samplesPerSecond = 6129.0 -MPI Rank 1: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.56177343 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.0970s; samplesPerSecond = 6597.4 -MPI Rank 1: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.50118791 * 640; EvalClassificationError = 0.64218750 * 640; time = 0.0917s; samplesPerSecond = 6982.9 -MPI Rank 1: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.40119788 * 640; EvalClassificationError = 0.62500000 * 640; time = 0.1018s; samplesPerSecond = 6289.2 -MPI Rank 1: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27491503 * 640; EvalClassificationError = 0.58906250 * 640; time = 0.0895s; samplesPerSecond = 7150.7 -MPI Rank 1: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.51724208 * 640; EvalClassificationError = 0.65781250 * 640; time = 0.0979s; samplesPerSecond = 6540.5 -MPI Rank 1: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27797542 * 640; EvalClassificationError = 0.59687500 * 640; time = 0.0984s; samplesPerSecond = 6502.9 -MPI Rank 1: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26017740 * 640; EvalClassificationError = 0.60937500 * 640; time = 0.1055s; samplesPerSecond = 6066.1 -MPI Rank 1: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24735342 * 640; EvalClassificationError = 0.58437500 * 640; time = 0.0707s; samplesPerSecond = 9058.1 -MPI Rank 1: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.23665382 * 640; EvalClassificationError = 0.60625000 * 640; time = 0.0399s; samplesPerSecond = 16044.1 -MPI Rank 1: 08/16/2016 03:20:53: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.03815141 * 20480; EvalClassificationError = 0.73432617 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.16512s -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:53: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 1: Actual gradient aggregation time: 0.016814 -MPI Rank 1: Async gradient aggregation wait time: 0.004995 -MPI Rank 1: Actual gradient aggregation time: 0.018553 -MPI Rank 1: 08/16/2016 03:20:54: Epoch[ 2 of 4]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.22369214 * 2304; EvalClassificationError = 0.61111111 * 2304; time = 0.1969s; samplesPerSecond = 11701.9 -MPI Rank 1: Async gradient aggregation wait time: 0.006765 -MPI Rank 1: Actual gradient aggregation time: 0.017839 -MPI Rank 1: Async gradient aggregation wait time: 0.012349 -MPI Rank 1: Actual gradient aggregation time: 0.016538 -MPI Rank 1: 08/16/2016 03:20:54: Epoch[ 2 of 4]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.23347641 * 2560; EvalClassificationError = 0.58320313 * 2560; time = 0.1777s; samplesPerSecond = 14408.1 -MPI Rank 1: Async gradient aggregation wait time: 0.0074 -MPI Rank 1: Actual gradient aggregation time: 0.016417 -MPI Rank 1: Async gradient aggregation wait time: 0.011519 -MPI Rank 1: Actual gradient aggregation time: 0.016373 -MPI Rank 1: 08/16/2016 03:20:54: Epoch[ 2 of 4]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.16589380 * 2560; EvalClassificationError = 0.57617188 * 2560; time = 0.1734s; samplesPerSecond = 14765.1 -MPI Rank 1: Async gradient aggregation wait time: 0.008701 -MPI Rank 1: Actual gradient aggregation time: 0.017605 -MPI Rank 1: Async gradient aggregation wait time: 0.003101 -MPI Rank 1: Actual gradient aggregation time: 0.017339 -MPI Rank 1: 08/16/2016 03:20:54: Epoch[ 2 of 4]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.17067204 * 2560; EvalClassificationError = 0.60664063 * 2560; time = 0.1734s; samplesPerSecond = 14766.9 -MPI Rank 1: Async gradient aggregation wait time: 0.007955 -MPI Rank 1: Actual gradient aggregation time: 0.017248 -MPI Rank 1: Async gradient aggregation wait time: 0.006691 -MPI Rank 1: Actual gradient aggregation time: 0.016928 -MPI Rank 1: 08/16/2016 03:20:54: Epoch[ 2 of 4]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.18189249 * 2560; EvalClassificationError = 0.58945313 * 2560; time = 0.1709s; samplesPerSecond = 14979.9 -MPI Rank 1: Async gradient aggregation wait time: 0.004503 -MPI Rank 1: Actual gradient aggregation time: 0.016657 -MPI Rank 1: Async gradient aggregation wait time: 0.011371 -MPI Rank 1: Actual gradient aggregation time: 0.017292 -MPI Rank 1: 08/16/2016 03:20:55: Epoch[ 2 of 4]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.08724350 * 2560; EvalClassificationError = 0.56562500 * 2560; time = 0.1717s; samplesPerSecond = 14909.0 -MPI Rank 1: Async gradient aggregation wait time: 0.008425 -MPI Rank 1: Actual gradient aggregation time: 0.015857 -MPI Rank 1: Async gradient aggregation wait time: 0.004495 -MPI Rank 1: Actual gradient aggregation time: 0.017283 -MPI Rank 1: 08/16/2016 03:20:55: Epoch[ 2 of 4]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 2.09218108 * 2560; EvalClassificationError = 0.59179688 * 2560; time = 0.1735s; samplesPerSecond = 14752.4 -MPI Rank 1: Async gradient aggregation wait time: 0.002782 -MPI Rank 1: Actual gradient aggregation time: 0.017143 -MPI Rank 1: Async gradient aggregation wait time: 0.006964 -MPI Rank 1: Actual gradient aggregation time: 0.016254 -MPI Rank 1: 08/16/2016 03:20:55: Epoch[ 2 of 4]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 2.10191157 * 2560; EvalClassificationError = 0.58632812 * 2560; time = 0.1672s; samplesPerSecond = 15314.9 -MPI Rank 1: Async gradient aggregation wait time: 0.007214 -MPI Rank 1: Actual gradient aggregation time: 0.006841 -MPI Rank 1: 08/16/2016 03:20:55: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.15620068 * 20480; EvalClassificationError = 0.58857422 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.42722s -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:55: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 1: Async gradient aggregation wait time: 0.010824 -MPI Rank 1: Actual gradient aggregation time: 0.034649 -MPI Rank 1: Async gradient aggregation wait time: 0.018618 -MPI Rank 1: Actual gradient aggregation time: 0.02933 -MPI Rank 1: 08/16/2016 03:20:55: Epoch[ 3 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 2.11767394 * 9216; EvalClassificationError = 0.56510417 * 9216; time = 0.3374s; samplesPerSecond = 27316.2 -MPI Rank 1: Async gradient aggregation wait time: 0.007969 -MPI Rank 1: Actual gradient aggregation time: 0.030869 -MPI Rank 1: Async gradient aggregation wait time: 0.003987 -MPI Rank 1: Actual gradient aggregation time: 0.031625 -MPI Rank 1: 08/16/2016 03:20:56: Epoch[ 3 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 2.08282316 * 10240; EvalClassificationError = 0.56914062 * 10240; time = 0.3192s; samplesPerSecond = 32083.2 -MPI Rank 1: 08/16/2016 03:20:56: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.09667113 * 20480; EvalClassificationError = 0.56689453 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.678136s -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:56: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 1: Async gradient aggregation wait time: 0.006331 -MPI Rank 1: Actual gradient aggregation time: 0.028676 -MPI Rank 1: Async gradient aggregation wait time: 0.007827 -MPI Rank 1: Actual gradient aggregation time: 0.028017 -MPI Rank 1: 08/16/2016 03:20:56: Epoch[ 4 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.98353176 * 9216; EvalClassificationError = 0.53982205 * 9216; time = 0.2977s; samplesPerSecond = 30954.9 -MPI Rank 1: Async gradient aggregation wait time: 0.007729 -MPI Rank 1: Actual gradient aggregation time: 0.029894 -MPI Rank 1: Async gradient aggregation wait time: 0.016801 -MPI Rank 1: Actual gradient aggregation time: 0.028078 -MPI Rank 1: 08/16/2016 03:20:56: Epoch[ 4 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.96715780 * 10240; EvalClassificationError = 0.53369141 * 10240; time = 0.2965s; samplesPerSecond = 34541.1 -MPI Rank 1: Async gradient aggregation wait time: 0.006883 -MPI Rank 1: 08/16/2016 03:20:56: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.97591825 * 20480; EvalClassificationError = 0.53642578 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=0.616484s -MPI Rank 1: 08/16/2016 03:20:57: CNTKCommandTrainEnd: speechTrain -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:57: Action "train" complete. -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:57: __COMPLETED__ -MPI Rank 1: ~MPIWrapper -MPI Rank 2: 08/16/2016 03:20:40: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank2 -MPI Rank 2: 08/16/2016 03:20:40: ------------------------------------------------------------------- -MPI Rank 2: 08/16/2016 03:20:40: Build info: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:40: Built time: Aug 16 2016 03:09:16 -MPI Rank 2: 08/16/2016 03:20:40: Last modified date: Fri Aug 12 05:28:23 2016 -MPI Rank 2: 08/16/2016 03:20:40: Build type: Release -MPI Rank 2: 08/16/2016 03:20:40: Build target: GPU -MPI Rank 2: 08/16/2016 03:20:40: With 1bit-SGD: yes -MPI Rank 2: 08/16/2016 03:20:40: Math lib: mkl -MPI Rank 2: 08/16/2016 03:20:40: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 -MPI Rank 2: 08/16/2016 03:20:40: CUB_PATH: c:\src\cub-1.4.1 -MPI Rank 2: 08/16/2016 03:20:40: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda -MPI Rank 2: 08/16/2016 03:20:40: Build Branch: HEAD -MPI Rank 2: 08/16/2016 03:20:40: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 -MPI Rank 2: 08/16/2016 03:20:40: Built by svcphil on Philly-Pool1 -MPI Rank 2: 08/16/2016 03:20:40: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -MPI Rank 2: 08/16/2016 03:20:40: ------------------------------------------------------------------- -MPI Rank 2: 08/16/2016 03:20:43: ------------------------------------------------------------------- -MPI Rank 2: 08/16/2016 03:20:43: GPU info: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:43: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 2: 08/16/2016 03:20:43: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 2: 08/16/2016 03:20:43: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 2: 08/16/2016 03:20:43: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB -MPI Rank 2: 08/16/2016 03:20:43: ------------------------------------------------------------------- -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:43: Running on DPHAIM-24 at 2016/08/16 03:20:43 -MPI Rank 2: 08/16/2016 03:20:43: Command line: -MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu DeviceId=0 timestamping=true numCPUThreads=8 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>> -MPI Rank 2: 08/16/2016 03:20:43: precision = "float" -MPI Rank 2: command = speechTrain -MPI Rank 2: deviceId = $DeviceId$ -MPI Rank 2: parallelTrain = true -MPI Rank 2: speechTrain = [ -MPI Rank 2: action = "train" -MPI Rank 2: modelPath = "$RunDir$/models/cntkSpeech.dnn" -MPI Rank 2: deviceId = $DeviceId$ -MPI Rank 2: traceLevel = 1 -MPI Rank 2: SimpleNetworkBuilder = [ -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 2: evalCriterion = "ClassificationError" -MPI Rank 2: layerTypes = "Sigmoid" -MPI Rank 2: initValueScale = 1.0 -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: uniformInit = true -MPI Rank 2: needPrior = true -MPI Rank 2: ] -MPI Rank 2: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = 'CE' -MPI Rank 2: evalCriterion = 'Err' -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: L = Length(layerSizes)-1 // number of model layers -MPI Rank 2: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 2: featNorm = if applyMeanVarNorm -MPI Rank 2: then MeanVarNorm(features) -MPI Rank 2: else features -MPI Rank 2: layers[layer:1..L-1] = if layer > 1 -MPI Rank 2: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 2: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 2: CE = if trainingCriterion == 'CE' -MPI Rank 2: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 2: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 2: Err = if evalCriterion == 'Err' then -MPI Rank 2: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 2: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 2: logPrior = LogPrior(labels) -MPI Rank 2: // TODO: how to add a tag to an infix operation? -MPI Rank 2: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 2: ] -MPI Rank 2: SGD = [ -MPI Rank 2: epochSize = 20480 -MPI Rank 2: minibatchSize = 64:256:1024 -MPI Rank 2: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 2: numMBsToShowResult = 10 -MPI Rank 2: momentumPerMB = 0.9:0.656119 -MPI Rank 2: dropoutRate = 0.0 -MPI Rank 2: maxEpochs = 3 -MPI Rank 2: keepCheckPointFiles = true -MPI Rank 2: clippingThresholdPerSample = 1#INF -MPI Rank 2: ParallelTrain = [ -MPI Rank 2: parallelizationMethod = "DataParallelSGD" -MPI Rank 2: distributedMBReading = true -MPI Rank 2: DataParallelSGD = [ -MPI Rank 2: gradientBits = 32 -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: AutoAdjust = [ -MPI Rank 2: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 2: loadBestModel = true -MPI Rank 2: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 2: learnRateDecreaseFactor = 0.5 -MPI Rank 2: learnRateIncreaseFactor = 1.382 -MPI Rank 2: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: reader = [ -MPI Rank 2: readerType = "HTKMLFReader" -MPI Rank 2: readMethod = "blockRandomize" -MPI Rank 2: miniBatchMode = "partial" -MPI Rank 2: randomize = "auto" -MPI Rank 2: verbosity = 0 -MPI Rank 2: useMersenneTwisterRand=true -MPI Rank 2: features = [ -MPI Rank 2: dim = 363 -MPI Rank 2: type = "real" -MPI Rank 2: scpFile = "glob_0000.scp" -MPI Rank 2: ] -MPI Rank 2: labels = [ -MPI Rank 2: mlfFile = "$DataDir$/glob_0000.mlf" -MPI Rank 2: labelMappingFile = "$DataDir$/state.list" -MPI Rank 2: labelDim = 132 -MPI Rank 2: labelType = "category" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 2: DeviceId=0 -MPI Rank 2: timestamping=true -MPI Rank 2: numCPUThreads=8 -MPI Rank 2: precision=double -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]] -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<< -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 2: 08/16/2016 03:20:43: precision = "float" -MPI Rank 2: command = speechTrain -MPI Rank 2: deviceId = 0 -MPI Rank 2: parallelTrain = true -MPI Rank 2: speechTrain = [ -MPI Rank 2: action = "train" -MPI Rank 2: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn" -MPI Rank 2: deviceId = 0 -MPI Rank 2: traceLevel = 1 -MPI Rank 2: SimpleNetworkBuilder = [ -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 2: evalCriterion = "ClassificationError" -MPI Rank 2: layerTypes = "Sigmoid" -MPI Rank 2: initValueScale = 1.0 -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: uniformInit = true -MPI Rank 2: needPrior = true -MPI Rank 2: ] -MPI Rank 2: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = 'CE' -MPI Rank 2: evalCriterion = 'Err' -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: L = Length(layerSizes)-1 // number of model layers -MPI Rank 2: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 2: featNorm = if applyMeanVarNorm -MPI Rank 2: then MeanVarNorm(features) -MPI Rank 2: else features -MPI Rank 2: layers[layer:1..L-1] = if layer > 1 -MPI Rank 2: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 2: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 2: CE = if trainingCriterion == 'CE' -MPI Rank 2: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 2: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 2: Err = if evalCriterion == 'Err' then -MPI Rank 2: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 2: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 2: logPrior = LogPrior(labels) -MPI Rank 2: // TODO: how to add a tag to an infix operation? -MPI Rank 2: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 2: ] -MPI Rank 2: SGD = [ -MPI Rank 2: epochSize = 20480 -MPI Rank 2: minibatchSize = 64:256:1024 -MPI Rank 2: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 2: numMBsToShowResult = 10 -MPI Rank 2: momentumPerMB = 0.9:0.656119 -MPI Rank 2: dropoutRate = 0.0 -MPI Rank 2: maxEpochs = 3 -MPI Rank 2: keepCheckPointFiles = true -MPI Rank 2: clippingThresholdPerSample = 1#INF -MPI Rank 2: ParallelTrain = [ -MPI Rank 2: parallelizationMethod = "DataParallelSGD" -MPI Rank 2: distributedMBReading = true -MPI Rank 2: DataParallelSGD = [ -MPI Rank 2: gradientBits = 32 -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: AutoAdjust = [ -MPI Rank 2: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 2: loadBestModel = true -MPI Rank 2: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 2: learnRateDecreaseFactor = 0.5 -MPI Rank 2: learnRateIncreaseFactor = 1.382 -MPI Rank 2: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: reader = [ -MPI Rank 2: readerType = "HTKMLFReader" -MPI Rank 2: readMethod = "blockRandomize" -MPI Rank 2: miniBatchMode = "partial" -MPI Rank 2: randomize = "auto" -MPI Rank 2: verbosity = 0 -MPI Rank 2: useMersenneTwisterRand=true -MPI Rank 2: features = [ -MPI Rank 2: dim = 363 -MPI Rank 2: type = "real" -MPI Rank 2: scpFile = "glob_0000.scp" -MPI Rank 2: ] -MPI Rank 2: labels = [ -MPI Rank 2: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 2: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 2: labelDim = 132 -MPI Rank 2: labelType = "category" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 2: DeviceId=0 -MPI Rank 2: timestamping=true -MPI Rank 2: numCPUThreads=8 -MPI Rank 2: precision=double -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]] -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 2: configparameters: cntk.cntk:command=speechTrain -MPI Rank 2: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 2: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: configparameters: cntk.cntk:deviceId=0 -MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=8 -MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true -MPI Rank 2: configparameters: cntk.cntk:precision=double -MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu -MPI Rank 2: configparameters: cntk.cntk:speechTrain=[ -MPI Rank 2: action = "train" -MPI Rank 2: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn" -MPI Rank 2: deviceId = 0 -MPI Rank 2: traceLevel = 1 -MPI Rank 2: SimpleNetworkBuilder = [ -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 2: evalCriterion = "ClassificationError" -MPI Rank 2: layerTypes = "Sigmoid" -MPI Rank 2: initValueScale = 1.0 -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: uniformInit = true -MPI Rank 2: needPrior = true -MPI Rank 2: ] -MPI Rank 2: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = 'CE' -MPI Rank 2: evalCriterion = 'Err' -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: L = Length(layerSizes)-1 // number of model layers -MPI Rank 2: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 2: featNorm = if applyMeanVarNorm -MPI Rank 2: then MeanVarNorm(features) -MPI Rank 2: else features -MPI Rank 2: layers[layer:1..L-1] = if layer > 1 -MPI Rank 2: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 2: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 2: CE = if trainingCriterion == 'CE' -MPI Rank 2: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 2: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 2: Err = if evalCriterion == 'Err' then -MPI Rank 2: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 2: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 2: logPrior = LogPrior(labels) -MPI Rank 2: // TODO: how to add a tag to an infix operation? -MPI Rank 2: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 2: ] -MPI Rank 2: SGD = [ -MPI Rank 2: epochSize = 20480 -MPI Rank 2: minibatchSize = 64:256:1024 -MPI Rank 2: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 2: numMBsToShowResult = 10 -MPI Rank 2: momentumPerMB = 0.9:0.656119 -MPI Rank 2: dropoutRate = 0.0 -MPI Rank 2: maxEpochs = 3 -MPI Rank 2: keepCheckPointFiles = true -MPI Rank 2: clippingThresholdPerSample = 1#INF -MPI Rank 2: ParallelTrain = [ -MPI Rank 2: parallelizationMethod = "DataParallelSGD" -MPI Rank 2: distributedMBReading = true -MPI Rank 2: DataParallelSGD = [ -MPI Rank 2: gradientBits = 32 -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: AutoAdjust = [ -MPI Rank 2: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 2: loadBestModel = true -MPI Rank 2: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 2: learnRateDecreaseFactor = 0.5 -MPI Rank 2: learnRateIncreaseFactor = 1.382 -MPI Rank 2: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: reader = [ -MPI Rank 2: readerType = "HTKMLFReader" -MPI Rank 2: readMethod = "blockRandomize" -MPI Rank 2: miniBatchMode = "partial" -MPI Rank 2: randomize = "auto" -MPI Rank 2: verbosity = 0 -MPI Rank 2: useMersenneTwisterRand=true -MPI Rank 2: features = [ -MPI Rank 2: dim = 363 -MPI Rank 2: type = "real" -MPI Rank 2: scpFile = "glob_0000.scp" -MPI Rank 2: ] -MPI Rank 2: labels = [ -MPI Rank 2: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 2: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 2: labelDim = 132 -MPI Rank 2: labelType = "category" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 2: -MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 2: configparameters: cntk.cntk:timestamping=true -MPI Rank 2: 08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 2: 08/16/2016 03:20:43: Commands: speechTrain -MPI Rank 2: 08/16/2016 03:20:43: Precision = "double" -MPI Rank 2: 08/16/2016 03:20:43: Using 8 CPU threads. -MPI Rank 2: 08/16/2016 03:20:43: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031852.202534\Speech\DNN_ParallelBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn -MPI Rank 2: 08/16/2016 03:20:43: CNTKCommandTrainInfo: speechTrain : 4 -MPI Rank 2: 08/16/2016 03:20:43: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4 -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:43: ############################################################################## -MPI Rank 2: 08/16/2016 03:20:43: # # -MPI Rank 2: 08/16/2016 03:20:43: # Action "train" # -MPI Rank 2: 08/16/2016 03:20:43: # # -MPI Rank 2: 08/16/2016 03:20:43: ############################################################################## -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:43: CNTKCommandTrainBegin: speechTrain -MPI Rank 2: SimpleNetworkBuilder Using GPU 0 -MPI Rank 2: reading script file glob_0000.scp ... 948 entries -MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 2: label set 0: 129 classes -MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:44: Creating virgin network. -MPI Rank 2: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000. -MPI Rank 2: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false). -MPI Rank 2: Microsoft::MSR::CNTK::GPUMatrix::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8 -MPI Rank 2: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 2: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 2: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000. -MPI Rank 2: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false). -MPI Rank 2: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 2: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 2: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000. -MPI Rank 2: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false). -MPI Rank 2: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 2: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 2: -MPI Rank 2: Post-processing network... -MPI Rank 2: -MPI Rank 2: 7 roots: -MPI Rank 2: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() -MPI Rank 2: EvalClassificationError = ClassificationError() -MPI Rank 2: InvStdOfFeatures = InvStdDev() -MPI Rank 2: MeanOfFeatures = Mean() -MPI Rank 2: PosteriorProb = Softmax() -MPI Rank 2: Prior = Mean() -MPI Rank 2: ScaledLogLikelihood = Minus() -MPI Rank 2: -MPI Rank 2: Validating network. 25 nodes to process in pass 1. -MPI Rank 2: -MPI Rank 2: Validating --> labels = InputValue() : -> [132 x *] -MPI Rank 2: Validating --> W2 = LearnableParameter() : -> [132 x 512] -MPI Rank 2: Validating --> W1 = LearnableParameter() : -> [512 x 512] -MPI Rank 2: Validating --> W0 = LearnableParameter() : -> [512 x 363] -MPI Rank 2: Validating --> features = InputValue() : -> [363 x *] -MPI Rank 2: Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363] -MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363] -MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *] -MPI Rank 2: Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *] -MPI Rank 2: Validating --> B0 = LearnableParameter() : -> [512 x 1] -MPI Rank 2: Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 2: Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 2: Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 2: Validating --> B1 = LearnableParameter() : -> [512 x 1] -MPI Rank 2: Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 2: Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 2: Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *] -MPI Rank 2: Validating --> B2 = LearnableParameter() : -> [132 x 1] -MPI Rank 2: Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *] -MPI Rank 2: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 2: Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 2: Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *] -MPI Rank 2: Validating --> Prior = Mean (labels) : [132 x *] -> [132] -MPI Rank 2: Validating --> LogOfPrior = Log (Prior) : [132] -> [132] -MPI Rank 2: Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *] -MPI Rank 2: -MPI Rank 2: Validating network. 17 nodes to process in pass 2. -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: Validating network, final pass. -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input data. -MPI Rank 2: -MPI Rank 2: Post-processing network complete. -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:44: Created model with 25 nodes on GPU 0. -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:44: Training criterion node(s): -MPI Rank 2: 08/16/2016 03:20:44: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:44: Evaluation criterion node(s): -MPI Rank 2: 08/16/2016 03:20:44: EvalClassificationError = ClassificationError -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: Allocating matrices for forward and/or backward propagation. -MPI Rank 2: -MPI Rank 2: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 2: -MPI Rank 2: { W0 : [512 x 363] (gradient) -MPI Rank 2: W0*features+B0 : [512 x 1 x *] } -MPI Rank 2: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1 : [512 x 1 x *] } -MPI Rank 2: { H2 : [512 x 1 x *] -MPI Rank 2: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 2: { B0 : [512 x 1] (gradient) -MPI Rank 2: H1 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 2: W2*H1 : [132 x 1 x *] } -MPI Rank 2: { HLast : [132 x 1 x *] -MPI Rank 2: W2 : [132 x 512] (gradient) } -MPI Rank 2: { B1 : [512 x 1] (gradient) -MPI Rank 2: H2 : [512 x 1 x *] (gradient) -MPI Rank 2: HLast : [132 x 1 x *] (gradient) } -MPI Rank 2: { H1 : [512 x 1 x *] -MPI Rank 2: W0*features : [512 x *] (gradient) } -MPI Rank 2: { W1 : [512 x 512] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:44: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:44: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 08/16/2016 03:20:44: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 08/16/2016 03:20:44: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 2: 08/16/2016 03:20:44: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 2: 08/16/2016 03:20:44: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 2: 08/16/2016 03:20:44: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:44: Precomputing --> 3 PreCompute nodes found. -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:44: MeanOfFeatures = Mean() -MPI Rank 2: 08/16/2016 03:20:44: InvStdOfFeatures = InvStdDev() -MPI Rank 2: 08/16/2016 03:20:44: Prior = Mean() -MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:50: Precomputing --> Completed. -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:50: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:50: Starting minibatch loop. -MPI Rank 2: 08/16/2016 03:20:50: Epoch[ 1 of 4]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.1040s; samplesPerSecond = 6155.6 -MPI Rank 2: 08/16/2016 03:20:50: Epoch[ 1 of 4]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.0991s; samplesPerSecond = 6456.2 -MPI Rank 2: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.0984s; samplesPerSecond = 6500.9 -MPI Rank 2: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.73643568 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.0972s; samplesPerSecond = 6587.3 -MPI Rank 2: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83079081 * 640; EvalClassificationError = 0.88281250 * 640; time = 0.0920s; samplesPerSecond = 6953.0 -MPI Rank 2: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71437689 * 640; EvalClassificationError = 0.86875000 * 640; time = 0.0921s; samplesPerSecond = 6946.9 -MPI Rank 2: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.42186230 * 640; EvalClassificationError = 0.79062500 * 640; time = 0.0988s; samplesPerSecond = 6476.8 -MPI Rank 2: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53658053 * 640; EvalClassificationError = 0.82031250 * 640; time = 0.0920s; samplesPerSecond = 6958.0 -MPI Rank 2: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.49758017 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.0844s; samplesPerSecond = 7583.0 -MPI Rank 2: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39996308 * 640; EvalClassificationError = 0.80468750 * 640; time = 0.0780s; samplesPerSecond = 8202.9 -MPI Rank 2: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.49445773 * 640; EvalClassificationError = 0.82500000 * 640; time = 0.0968s; samplesPerSecond = 6608.2 -MPI Rank 2: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.26676999 * 640; EvalClassificationError = 0.79218750 * 640; time = 0.1046s; samplesPerSecond = 6118.0 -MPI Rank 2: 08/16/2016 03:20:51: Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.18870173 * 640; EvalClassificationError = 0.78906250 * 640; time = 0.0919s; samplesPerSecond = 6960.6 -MPI Rank 2: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.05687264 * 640; EvalClassificationError = 0.74687500 * 640; time = 0.0943s; samplesPerSecond = 6788.1 -MPI Rank 2: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.95594569 * 640; EvalClassificationError = 0.71875000 * 640; time = 0.0877s; samplesPerSecond = 7298.6 -MPI Rank 2: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.10219604 * 640; EvalClassificationError = 0.74062500 * 640; time = 0.0951s; samplesPerSecond = 6726.9 -MPI Rank 2: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.80745014 * 640; EvalClassificationError = 0.70625000 * 640; time = 0.0914s; samplesPerSecond = 7002.0 -MPI Rank 2: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72061842 * 640; EvalClassificationError = 0.65468750 * 640; time = 0.0951s; samplesPerSecond = 6726.4 -MPI Rank 2: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80425747 * 640; EvalClassificationError = 0.71718750 * 640; time = 0.0921s; samplesPerSecond = 6952.3 -MPI Rank 2: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.71253068 * 640; EvalClassificationError = 0.67812500 * 640; time = 0.0981s; samplesPerSecond = 6525.8 -MPI Rank 2: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.59360399 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.1111s; samplesPerSecond = 5763.0 -MPI Rank 2: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60386649 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.0984s; samplesPerSecond = 6506.5 -MPI Rank 2: 08/16/2016 03:20:52: Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53706678 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.0886s; samplesPerSecond = 7225.4 -MPI Rank 2: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.56177343 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.0954s; samplesPerSecond = 6710.3 -MPI Rank 2: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.50118791 * 640; EvalClassificationError = 0.64218750 * 640; time = 0.0975s; samplesPerSecond = 6562.5 -MPI Rank 2: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.40119788 * 640; EvalClassificationError = 0.62500000 * 640; time = 0.1008s; samplesPerSecond = 6349.5 -MPI Rank 2: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27491503 * 640; EvalClassificationError = 0.58906250 * 640; time = 0.0910s; samplesPerSecond = 7033.9 -MPI Rank 2: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.51724208 * 640; EvalClassificationError = 0.65781250 * 640; time = 0.0879s; samplesPerSecond = 7278.0 -MPI Rank 2: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27797542 * 640; EvalClassificationError = 0.59687500 * 640; time = 0.1004s; samplesPerSecond = 6372.2 -MPI Rank 2: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26017740 * 640; EvalClassificationError = 0.60937500 * 640; time = 0.0976s; samplesPerSecond = 6560.3 -MPI Rank 2: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24735342 * 640; EvalClassificationError = 0.58437500 * 640; time = 0.0959s; samplesPerSecond = 6677.0 -MPI Rank 2: 08/16/2016 03:20:53: Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.23665382 * 640; EvalClassificationError = 0.60625000 * 640; time = 0.0958s; samplesPerSecond = 6683.8 -MPI Rank 2: 08/16/2016 03:20:53: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.03815141 * 20480; EvalClassificationError = 0.73432617 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.06077s -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:53: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 2: Actual gradient aggregation time: 0.035327 -MPI Rank 2: Async gradient aggregation wait time: 0.00284 -MPI Rank 2: Actual gradient aggregation time: 0.018497 -MPI Rank 2: 08/16/2016 03:20:54: Epoch[ 2 of 4]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.22369214 * 2304; EvalClassificationError = 0.61111111 * 2304; time = 0.2022s; samplesPerSecond = 11394.7 -MPI Rank 2: Async gradient aggregation wait time: 1e-006 -MPI Rank 2: Actual gradient aggregation time: 0.01201 -MPI Rank 2: Async gradient aggregation wait time: 0.013216 -MPI Rank 2: Actual gradient aggregation time: 0.016512 -MPI Rank 2: 08/16/2016 03:20:54: Epoch[ 2 of 4]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.23347641 * 2560; EvalClassificationError = 0.58320313 * 2560; time = 0.1741s; samplesPerSecond = 14705.9 -MPI Rank 2: Async gradient aggregation wait time: 0.010019 -MPI Rank 2: Actual gradient aggregation time: 0.016346 -MPI Rank 2: Async gradient aggregation wait time: 0.008387 -MPI Rank 2: Actual gradient aggregation time: 0.016248 -MPI Rank 2: 08/16/2016 03:20:54: Epoch[ 2 of 4]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.16589380 * 2560; EvalClassificationError = 0.57617188 * 2560; time = 0.1733s; samplesPerSecond = 14771.6 -MPI Rank 2: Async gradient aggregation wait time: 0.00665 -MPI Rank 2: Actual gradient aggregation time: 0.017602 -MPI Rank 2: Async gradient aggregation wait time: 0.006178 -MPI Rank 2: Actual gradient aggregation time: 0.017222 -MPI Rank 2: 08/16/2016 03:20:54: Epoch[ 2 of 4]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.17067204 * 2560; EvalClassificationError = 0.60664063 * 2560; time = 0.1735s; samplesPerSecond = 14758.8 -MPI Rank 2: Async gradient aggregation wait time: 0.004815 -MPI Rank 2: Actual gradient aggregation time: 0.016551 -MPI Rank 2: Async gradient aggregation wait time: 0.009325 -MPI Rank 2: Actual gradient aggregation time: 0.016836 -MPI Rank 2: 08/16/2016 03:20:54: Epoch[ 2 of 4]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.18189249 * 2560; EvalClassificationError = 0.58945313 * 2560; time = 0.1709s; samplesPerSecond = 14982.6 -MPI Rank 2: Async gradient aggregation wait time: 0.005557 -MPI Rank 2: Actual gradient aggregation time: 0.016515 -MPI Rank 2: Async gradient aggregation wait time: 0.000651 -MPI Rank 2: Actual gradient aggregation time: 0.017232 -MPI Rank 2: 08/16/2016 03:20:55: Epoch[ 2 of 4]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.08724350 * 2560; EvalClassificationError = 0.56562500 * 2560; time = 0.1717s; samplesPerSecond = 14910.2 -MPI Rank 2: Async gradient aggregation wait time: 0.004326 -MPI Rank 2: Actual gradient aggregation time: 0.016551 -MPI Rank 2: Async gradient aggregation wait time: 0.005588 -MPI Rank 2: Actual gradient aggregation time: 0.017189 -MPI Rank 2: 08/16/2016 03:20:55: Epoch[ 2 of 4]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 2.09218108 * 2560; EvalClassificationError = 0.59179688 * 2560; time = 0.1735s; samplesPerSecond = 14752.5 -MPI Rank 2: Async gradient aggregation wait time: 0.004491 -MPI Rank 2: Actual gradient aggregation time: 0.016498 -MPI Rank 2: Async gradient aggregation wait time: 0.003218 -MPI Rank 2: Actual gradient aggregation time: 0.016326 -MPI Rank 2: 08/16/2016 03:20:55: Epoch[ 2 of 4]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 2.10191157 * 2560; EvalClassificationError = 0.58632812 * 2560; time = 0.1679s; samplesPerSecond = 15250.3 -MPI Rank 2: Async gradient aggregation wait time: 0.006218 -MPI Rank 2: Actual gradient aggregation time: 0.006778 -MPI Rank 2: 08/16/2016 03:20:55: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.15620068 * 20480; EvalClassificationError = 0.58857422 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.42687s -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:55: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 2: Async gradient aggregation wait time: 1e-006 -MPI Rank 2: Actual gradient aggregation time: 0.016322 -MPI Rank 2: Async gradient aggregation wait time: 0.013477 -MPI Rank 2: Actual gradient aggregation time: 0.029454 -MPI Rank 2: 08/16/2016 03:20:55: Epoch[ 3 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 2.11767394 * 9216; EvalClassificationError = 0.56510417 * 9216; time = 0.3412s; samplesPerSecond = 27012.3 -MPI Rank 2: Async gradient aggregation wait time: 0.004206 -MPI Rank 2: Actual gradient aggregation time: 0.03017 -MPI Rank 2: Async gradient aggregation wait time: 0.009311 -MPI Rank 2: Actual gradient aggregation time: 0.03402 -MPI Rank 2: 08/16/2016 03:20:56: Epoch[ 3 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 2.08282316 * 10240; EvalClassificationError = 0.56914062 * 10240; time = 0.3164s; samplesPerSecond = 32364.4 -MPI Rank 2: 08/16/2016 03:20:56: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.09667113 * 20480; EvalClassificationError = 0.56689453 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.677824s -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:56: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 2: Async gradient aggregation wait time: 0.024966 -MPI Rank 2: Actual gradient aggregation time: 0.028835 -MPI Rank 2: Async gradient aggregation wait time: 0.002866 -MPI Rank 2: Actual gradient aggregation time: 0.027712 -MPI Rank 2: 08/16/2016 03:20:56: Epoch[ 4 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.98353176 * 9216; EvalClassificationError = 0.53982205 * 9216; time = 0.3015s; samplesPerSecond = 30568.5 -MPI Rank 2: Async gradient aggregation wait time: 0.003038 -MPI Rank 2: Actual gradient aggregation time: 0.029717 -MPI Rank 2: Async gradient aggregation wait time: 0.009962 -MPI Rank 2: Actual gradient aggregation time: 0.028025 -MPI Rank 2: 08/16/2016 03:20:56: Epoch[ 4 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.96715780 * 10240; EvalClassificationError = 0.53369141 * 10240; time = 0.2965s; samplesPerSecond = 34531.1 -MPI Rank 2: Async gradient aggregation wait time: 0.006611 -MPI Rank 2: 08/16/2016 03:20:56: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.97591825 * 20480; EvalClassificationError = 0.53642578 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=0.616116s -MPI Rank 2: 08/16/2016 03:20:56: CNTKCommandTrainEnd: speechTrain -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:56: Action "train" complete. -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:57: __COMPLETED__ -MPI Rank 2: ~MPIWrapper \ No newline at end of file diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/run-test b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/run-test deleted file mode 100755 index 9198207df..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/run-test +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -. $TEST_ROOT_DIR/run-test-common - -OriginalTestDir=../../../DNN/ParallelBufferedAsyncGradientAggregation -ConfigDir=$TEST_DIR/../../../DNN -LogFileName=stderr -Instances=3 -NumCPUThreads=$(threadsPerInstance $Instances) - -(cd $TEST_DIR/$OriginalTestDir && md5sum baseline*) | (cd $TEST_DIR && md5sum --status -c -) -if [ $? != 0 ]; then - echo Error: Baselines must match original test. Copy from $OriginalTestDir. - exit 1 -fi - -# cntkmpirun -cntkmpirun "-n $Instances" cntk.cntk "speechTrain=[reader=[readerType=HTKDeserializers]] numCPUThreads=$NumCPUThreads precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=1]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]" -ExitCode=$? -sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank0 -sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank1 -sed 's/^/MPI Rank 2: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank2 -exit $ExitCode diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/testcases.yml b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/testcases.yml deleted file mode 100644 index 2e61d7f55..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/testcases.yml +++ /dev/null @@ -1,39 +0,0 @@ -dataDir: ../../../Data -tags: - # - bvt-s (build_sku == 'gpu') and ((flavor == 'release') if (os == 'windows') else ((flavor == 'debug') ^ (device == 'cpu'))) - - nightly-s (build_sku == '1bitsgd') - -testCases: - Must train epochs in exactly same order and parameters for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Starting Epoch {{integer}} - - learning rate per sample = {{float}} - - momentum = {{float}} - - Epochs must be finished with expected results for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Finished Epoch[{{integer}} of {{integer}}] - - CrossEntropyWithSoftmax = {{float,tolerance=0%}} - - EvalClassificationError = {{float,tolerance=0.00000001}} - - learningRatePerSample = {{float,tolerance=0%}} - - Per-minibatch training results must match for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}} - - " * {{integer}}; " - - CrossEntropyWithSoftmax = {{float,tolerance=0%}} - - EvalClassificationError = {{float,tolerance=0.00000001}} - - DataParallelSGD training parameters must match for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Starting minibatch loop - - DataParallelSGD training - - myRank = {{integer}} - - numNodes = 3 - - numGradientBits = 1 - - distributed reading is ENABLED - - BufferedAsyncGradientAggregation is ENABLED diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.cpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.cpu.txt deleted file mode 100644 index b691f0ccb..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.cpu.txt +++ /dev/null @@ -1,438 +0,0 @@ -CPU info: - CPU Model Name: Intel(R) Xeon(R) CPU W3530 @ 2.80GHz - Hardware threads: 4 - Total Memory: 12580404 kB -------------------------------------------------------------------- -=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu/stderr -CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:16 - -C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu/stderr -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes on a single host (3 requested); we (1) are in (participating) -CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:16 - -C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu/stderr -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes on a single host (3 requested); we (2) are in (participating) -CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:16 - -C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu/stderr -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes on a single host (3 requested); we (0) are in (participating) -MPI Rank 0: 12/15/2016 08:31:16: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu/stderr_speechTrain.logrank0 -MPI Rank 0: CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:16 -MPI Rank 0: -MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu/stderr -MPI Rank 0: 12/15/2016 08:31:16: Using 1 CPU threads. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:16: ############################################################################## -MPI Rank 0: 12/15/2016 08:31:16: # # -MPI Rank 0: 12/15/2016 08:31:16: # speechTrain command (train action) # -MPI Rank 0: 12/15/2016 08:31:16: # # -MPI Rank 0: 12/15/2016 08:31:16: ############################################################################## -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:16: -MPI Rank 0: Creating virgin network. -MPI Rank 0: SimpleNetworkBuilder Using CPU -MPI Rank 0: reading script file glob_0000.scp ... 948 entries -MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 0: label set 0: 129 classes -MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 0: 12/15/2016 08:31:16: -MPI Rank 0: Model has 25 nodes. Using CPU. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:16: Training criterion: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 0: 12/15/2016 08:31:16: Evaluation criterion: EvalClassificationError = ClassificationError -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: Allocating matrices for forward and/or backward propagation. -MPI Rank 0: -MPI Rank 0: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 0: -MPI Rank 0: { B1 : [512 x 1] (gradient) -MPI Rank 0: H2 : [512 x 1 x *] (gradient) -MPI Rank 0: HLast : [132 x 1 x *] (gradient) } -MPI Rank 0: { HLast : [132 x 1 x *] -MPI Rank 0: W2 : [132 x 512] (gradient) } -MPI Rank 0: { W1 : [512 x 512] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 0: { W0 : [512 x 363] (gradient) -MPI Rank 0: W0*features+B0 : [512 x 1 x *] } -MPI Rank 0: { H2 : [512 x 1 x *] -MPI Rank 0: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 0: { H1 : [512 x 1 x *] -MPI Rank 0: W0*features : [512 x *] (gradient) } -MPI Rank 0: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1 : [512 x 1 x *] } -MPI Rank 0: { B0 : [512 x 1] (gradient) -MPI Rank 0: H1 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 0: W2*H1 : [132 x 1 x *] } -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:16: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:16: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 12/15/2016 08:31:16: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 12/15/2016 08:31:16: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 0: 12/15/2016 08:31:16: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 0: 12/15/2016 08:31:16: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 0: 12/15/2016 08:31:16: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 0: -MPI Rank 0: Initializing dataParallelSGD with FP32 aggregation. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:16: Precomputing --> 3 PreCompute nodes found. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:16: MeanOfFeatures = Mean() -MPI Rank 0: 12/15/2016 08:31:16: InvStdOfFeatures = InvStdDev() -MPI Rank 0: 12/15/2016 08:31:16: Prior = Mean() -MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:19: Precomputing --> Completed. -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:20: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:20: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 0: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.59755209 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.1225s; samplesPerSecond = 5225.1 -MPI Rank 0: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.34610347 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.1122s; samplesPerSecond = 5704.3 -MPI Rank 0: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98222493 * 640; EvalClassificationError = 0.89062500 * 640; time = 0.1140s; samplesPerSecond = 5613.7 -MPI Rank 0: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.74152761 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.1134s; samplesPerSecond = 5643.9 -MPI Rank 0: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83818495 * 640; EvalClassificationError = 0.86718750 * 640; time = 0.1116s; samplesPerSecond = 5737.0 -MPI Rank 0: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71641133 * 640; EvalClassificationError = 0.87500000 * 640; time = 0.1124s; samplesPerSecond = 5694.5 -MPI Rank 0: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.41802654 * 640; EvalClassificationError = 0.79687500 * 640; time = 0.1131s; samplesPerSecond = 5657.7 -MPI Rank 0: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53832811 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.1120s; samplesPerSecond = 5716.2 -MPI Rank 0: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.50627956 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.1143s; samplesPerSecond = 5600.5 -MPI Rank 0: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.41478094 * 640; EvalClassificationError = 0.80781250 * 640; time = 0.1109s; samplesPerSecond = 5770.2 -MPI Rank 0: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.51031055 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.1135s; samplesPerSecond = 5638.0 -MPI Rank 0: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.28365293 * 640; EvalClassificationError = 0.79375000 * 640; time = 0.1114s; samplesPerSecond = 5743.3 -MPI Rank 0: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.20931888 * 640; EvalClassificationError = 0.79531250 * 640; time = 0.1114s; samplesPerSecond = 5747.3 -MPI Rank 0: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07460312 * 640; EvalClassificationError = 0.75468750 * 640; time = 0.1083s; samplesPerSecond = 5911.6 -MPI Rank 0: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.97528860 * 640; EvalClassificationError = 0.72031250 * 640; time = 0.1105s; samplesPerSecond = 5791.6 -MPI Rank 0: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.11968648 * 640; EvalClassificationError = 0.74531250 * 640; time = 0.1123s; samplesPerSecond = 5696.6 -MPI Rank 0: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.84171867 * 640; EvalClassificationError = 0.71093750 * 640; time = 0.1130s; samplesPerSecond = 5664.7 -MPI Rank 0: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74031476 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.1095s; samplesPerSecond = 5845.3 -MPI Rank 0: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83857843 * 640; EvalClassificationError = 0.72656250 * 640; time = 0.1108s; samplesPerSecond = 5774.1 -MPI Rank 0: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.74632032 * 640; EvalClassificationError = 0.69218750 * 640; time = 0.1098s; samplesPerSecond = 5831.0 -MPI Rank 0: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.61032974 * 640; EvalClassificationError = 0.66250000 * 640; time = 0.1097s; samplesPerSecond = 5831.9 -MPI Rank 0: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61330475 * 640; EvalClassificationError = 0.65000000 * 640; time = 0.1111s; samplesPerSecond = 5761.5 -MPI Rank 0: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.54591535 * 640; EvalClassificationError = 0.66406250 * 640; time = 0.1095s; samplesPerSecond = 5843.4 -MPI Rank 0: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.57566229 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.1103s; samplesPerSecond = 5803.4 -MPI Rank 0: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.49164700 * 640; EvalClassificationError = 0.63281250 * 640; time = 0.1096s; samplesPerSecond = 5838.5 -MPI Rank 0: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39954552 * 640; EvalClassificationError = 0.62812500 * 640; time = 0.1142s; samplesPerSecond = 5605.4 -MPI Rank 0: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27033979 * 640; EvalClassificationError = 0.59375000 * 640; time = 0.1105s; samplesPerSecond = 5792.2 -MPI Rank 0: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.52112142 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.1101s; samplesPerSecond = 5810.8 -MPI Rank 0: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27800742 * 640; EvalClassificationError = 0.59062500 * 640; time = 0.1102s; samplesPerSecond = 5809.0 -MPI Rank 0: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26783400 * 640; EvalClassificationError = 0.61093750 * 640; time = 0.1086s; samplesPerSecond = 5891.6 -MPI Rank 0: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24590123 * 640; EvalClassificationError = 0.58593750 * 640; time = 0.1105s; samplesPerSecond = 5791.8 -MPI Rank 0: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.24415391 * 640; EvalClassificationError = 0.59843750 * 640; time = 0.1092s; samplesPerSecond = 5860.6 -MPI Rank 0: 12/15/2016 08:31:23: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.04696796 * 20480; EvalClassificationError = 0.73583984 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.58853s -MPI Rank 0: 12/15/2016 08:31:23: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn.1' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:23: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:23: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 0: 12/15/2016 08:31:24: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.14624175 * 2560; EvalClassificationError = 0.56953125 * 2560; time = 0.2418s; samplesPerSecond = 10587.8 -MPI Rank 0: 12/15/2016 08:31:24: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.06174128 * 2560; EvalClassificationError = 0.55742187 * 2560; time = 0.2326s; samplesPerSecond = 11004.5 -MPI Rank 0: 12/15/2016 08:31:24: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.04994338 * 2560; EvalClassificationError = 0.55351562 * 2560; time = 0.2293s; samplesPerSecond = 11166.7 -MPI Rank 0: 12/15/2016 08:31:24: Epoch[ 2 of 3]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.03695538 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.2298s; samplesPerSecond = 11138.5 -MPI Rank 0: 12/15/2016 08:31:25: Epoch[ 2 of 3]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.03086227 * 2560; EvalClassificationError = 0.55664063 * 2560; time = 0.2347s; samplesPerSecond = 10907.4 -MPI Rank 0: 12/15/2016 08:31:25: Epoch[ 2 of 3]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 1.97306193 * 2560; EvalClassificationError = 0.53671875 * 2560; time = 0.2221s; samplesPerSecond = 11524.3 -MPI Rank 0: 12/15/2016 08:31:25: Epoch[ 2 of 3]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 1.96746064 * 2560; EvalClassificationError = 0.53164062 * 2560; time = 0.2241s; samplesPerSecond = 11425.0 -MPI Rank 0: 12/15/2016 08:31:25: Epoch[ 2 of 3]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 1.95498165 * 2560; EvalClassificationError = 0.53750000 * 2560; time = 0.2250s; samplesPerSecond = 11378.8 -MPI Rank 0: 12/15/2016 08:31:25: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.02765603 * 20480; EvalClassificationError = 0.55053711 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.84777s -MPI Rank 0: 12/15/2016 08:31:25: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn.2' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:25: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:25: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 0: 12/15/2016 08:31:26: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95358449 * 10240; EvalClassificationError = 0.53603516 * 10240; time = 0.7052s; samplesPerSecond = 14520.5 -MPI Rank 0: 12/15/2016 08:31:27: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97540911 * 10240; EvalClassificationError = 0.55253906 * 10240; time = 0.6838s; samplesPerSecond = 14975.6 -MPI Rank 0: 12/15/2016 08:31:27: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96449680 * 20480; EvalClassificationError = 0.54428711 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.39589s -MPI Rank 0: 12/15/2016 08:31:27: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu/models/cntkSpeech.dnn' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:27: Action "train" complete. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:27: __COMPLETED__ -MPI Rank 1: 12/15/2016 08:31:17: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu/stderr_speechTrain.logrank1 -MPI Rank 1: CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:16 -MPI Rank 1: -MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu/stderr -MPI Rank 1: 12/15/2016 08:31:17: Using 1 CPU threads. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:17: ############################################################################## -MPI Rank 1: 12/15/2016 08:31:17: # # -MPI Rank 1: 12/15/2016 08:31:17: # speechTrain command (train action) # -MPI Rank 1: 12/15/2016 08:31:17: # # -MPI Rank 1: 12/15/2016 08:31:17: ############################################################################## -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:17: -MPI Rank 1: Creating virgin network. -MPI Rank 1: SimpleNetworkBuilder Using CPU -MPI Rank 1: reading script file glob_0000.scp ... 948 entries -MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 1: label set 0: 129 classes -MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 1: 12/15/2016 08:31:17: -MPI Rank 1: Model has 25 nodes. Using CPU. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:17: Training criterion: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 1: 12/15/2016 08:31:17: Evaluation criterion: EvalClassificationError = ClassificationError -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: Allocating matrices for forward and/or backward propagation. -MPI Rank 1: -MPI Rank 1: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 1: -MPI Rank 1: { HLast : [132 x 1 x *] -MPI Rank 1: W2 : [132 x 512] (gradient) } -MPI Rank 1: { H1 : [512 x 1 x *] -MPI Rank 1: W0*features : [512 x *] (gradient) } -MPI Rank 1: { H2 : [512 x 1 x *] -MPI Rank 1: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 1: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1 : [512 x 1 x *] } -MPI Rank 1: { B0 : [512 x 1] (gradient) -MPI Rank 1: H1 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 1: W2*H1 : [132 x 1 x *] } -MPI Rank 1: { B1 : [512 x 1] (gradient) -MPI Rank 1: H2 : [512 x 1 x *] (gradient) -MPI Rank 1: HLast : [132 x 1 x *] (gradient) } -MPI Rank 1: { W1 : [512 x 512] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 1: { W0 : [512 x 363] (gradient) -MPI Rank 1: W0*features+B0 : [512 x 1 x *] } -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:17: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:17: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 12/15/2016 08:31:17: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 12/15/2016 08:31:17: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 1: 12/15/2016 08:31:17: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 1: 12/15/2016 08:31:17: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 1: 12/15/2016 08:31:17: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 1: -MPI Rank 1: Initializing dataParallelSGD with FP32 aggregation. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:17: Precomputing --> 3 PreCompute nodes found. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:17: MeanOfFeatures = Mean() -MPI Rank 1: 12/15/2016 08:31:17: InvStdOfFeatures = InvStdDev() -MPI Rank 1: 12/15/2016 08:31:17: Prior = Mean() -MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:19: Precomputing --> Completed. -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:20: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:20: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 1: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.59755209 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.1266s; samplesPerSecond = 5054.1 -MPI Rank 1: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.34610347 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.1122s; samplesPerSecond = 5704.8 -MPI Rank 1: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98222493 * 640; EvalClassificationError = 0.89062500 * 640; time = 0.1140s; samplesPerSecond = 5613.1 -MPI Rank 1: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.74152761 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.1135s; samplesPerSecond = 5641.1 -MPI Rank 1: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83818495 * 640; EvalClassificationError = 0.86718750 * 640; time = 0.1115s; samplesPerSecond = 5738.9 -MPI Rank 1: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71641133 * 640; EvalClassificationError = 0.87500000 * 640; time = 0.1124s; samplesPerSecond = 5694.4 -MPI Rank 1: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.41802654 * 640; EvalClassificationError = 0.79687500 * 640; time = 0.1131s; samplesPerSecond = 5658.0 -MPI Rank 1: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53832811 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.1120s; samplesPerSecond = 5716.0 -MPI Rank 1: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.50627956 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.1143s; samplesPerSecond = 5601.3 -MPI Rank 1: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.41478094 * 640; EvalClassificationError = 0.80781250 * 640; time = 0.1109s; samplesPerSecond = 5770.2 -MPI Rank 1: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.51031055 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.1135s; samplesPerSecond = 5637.7 -MPI Rank 1: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.28365293 * 640; EvalClassificationError = 0.79375000 * 640; time = 0.1114s; samplesPerSecond = 5743.2 -MPI Rank 1: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.20931888 * 640; EvalClassificationError = 0.79531250 * 640; time = 0.1114s; samplesPerSecond = 5747.1 -MPI Rank 1: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07460312 * 640; EvalClassificationError = 0.75468750 * 640; time = 0.1083s; samplesPerSecond = 5910.5 -MPI Rank 1: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.97528860 * 640; EvalClassificationError = 0.72031250 * 640; time = 0.1105s; samplesPerSecond = 5791.5 -MPI Rank 1: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.11968648 * 640; EvalClassificationError = 0.74531250 * 640; time = 0.1123s; samplesPerSecond = 5697.8 -MPI Rank 1: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.84171867 * 640; EvalClassificationError = 0.71093750 * 640; time = 0.1130s; samplesPerSecond = 5664.4 -MPI Rank 1: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74031476 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.1095s; samplesPerSecond = 5845.0 -MPI Rank 1: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83857843 * 640; EvalClassificationError = 0.72656250 * 640; time = 0.1108s; samplesPerSecond = 5773.8 -MPI Rank 1: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.74632032 * 640; EvalClassificationError = 0.69218750 * 640; time = 0.1097s; samplesPerSecond = 5832.7 -MPI Rank 1: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.61032974 * 640; EvalClassificationError = 0.66250000 * 640; time = 0.1098s; samplesPerSecond = 5829.6 -MPI Rank 1: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61330475 * 640; EvalClassificationError = 0.65000000 * 640; time = 0.1111s; samplesPerSecond = 5761.0 -MPI Rank 1: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.54591535 * 640; EvalClassificationError = 0.66406250 * 640; time = 0.1096s; samplesPerSecond = 5841.4 -MPI Rank 1: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.57566229 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.1103s; samplesPerSecond = 5800.9 -MPI Rank 1: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.49164700 * 640; EvalClassificationError = 0.63281250 * 640; time = 0.1097s; samplesPerSecond = 5835.1 -MPI Rank 1: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39954552 * 640; EvalClassificationError = 0.62812500 * 640; time = 0.1141s; samplesPerSecond = 5607.5 -MPI Rank 1: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27033979 * 640; EvalClassificationError = 0.59375000 * 640; time = 0.1105s; samplesPerSecond = 5792.1 -MPI Rank 1: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.52112142 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.1102s; samplesPerSecond = 5809.7 -MPI Rank 1: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27800742 * 640; EvalClassificationError = 0.59062500 * 640; time = 0.1102s; samplesPerSecond = 5809.4 -MPI Rank 1: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26783400 * 640; EvalClassificationError = 0.61093750 * 640; time = 0.1086s; samplesPerSecond = 5891.6 -MPI Rank 1: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24590123 * 640; EvalClassificationError = 0.58593750 * 640; time = 0.1105s; samplesPerSecond = 5791.0 -MPI Rank 1: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.24415391 * 640; EvalClassificationError = 0.59843750 * 640; time = 0.1092s; samplesPerSecond = 5860.4 -MPI Rank 1: 12/15/2016 08:31:23: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.04696796 * 20480; EvalClassificationError = 0.73583984 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.59272s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:23: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:23: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 1: 12/15/2016 08:31:24: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.14624175 * 2560; EvalClassificationError = 0.56953125 * 2560; time = 0.2419s; samplesPerSecond = 10583.8 -MPI Rank 1: 12/15/2016 08:31:24: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.06174128 * 2560; EvalClassificationError = 0.55742187 * 2560; time = 0.2327s; samplesPerSecond = 11002.6 -MPI Rank 1: 12/15/2016 08:31:24: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.04994338 * 2560; EvalClassificationError = 0.55351562 * 2560; time = 0.2293s; samplesPerSecond = 11164.1 -MPI Rank 1: 12/15/2016 08:31:24: Epoch[ 2 of 3]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.03695538 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.2298s; samplesPerSecond = 11140.6 -MPI Rank 1: 12/15/2016 08:31:25: Epoch[ 2 of 3]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.03086227 * 2560; EvalClassificationError = 0.55664063 * 2560; time = 0.2347s; samplesPerSecond = 10907.3 -MPI Rank 1: 12/15/2016 08:31:25: Epoch[ 2 of 3]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 1.97306193 * 2560; EvalClassificationError = 0.53671875 * 2560; time = 0.2222s; samplesPerSecond = 11522.8 -MPI Rank 1: 12/15/2016 08:31:25: Epoch[ 2 of 3]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 1.96746064 * 2560; EvalClassificationError = 0.53164062 * 2560; time = 0.2240s; samplesPerSecond = 11427.0 -MPI Rank 1: 12/15/2016 08:31:25: Epoch[ 2 of 3]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 1.95498165 * 2560; EvalClassificationError = 0.53750000 * 2560; time = 0.2250s; samplesPerSecond = 11378.7 -MPI Rank 1: 12/15/2016 08:31:25: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.02765603 * 20480; EvalClassificationError = 0.55053711 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.84776s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:25: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:25: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 1: 12/15/2016 08:31:26: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95358449 * 10240; EvalClassificationError = 0.53603516 * 10240; time = 0.7053s; samplesPerSecond = 14519.2 -MPI Rank 1: 12/15/2016 08:31:27: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97540911 * 10240; EvalClassificationError = 0.55253906 * 10240; time = 0.6838s; samplesPerSecond = 14975.4 -MPI Rank 1: 12/15/2016 08:31:27: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96449680 * 20480; EvalClassificationError = 0.54428711 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.39588s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:27: Action "train" complete. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:27: __COMPLETED__ -MPI Rank 2: 12/15/2016 08:31:17: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu/stderr_speechTrain.logrank2 -MPI Rank 2: CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:16 -MPI Rank 2: -MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_cpu/stderr -MPI Rank 2: 12/15/2016 08:31:17: Using 1 CPU threads. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:17: ############################################################################## -MPI Rank 2: 12/15/2016 08:31:17: # # -MPI Rank 2: 12/15/2016 08:31:17: # speechTrain command (train action) # -MPI Rank 2: 12/15/2016 08:31:17: # # -MPI Rank 2: 12/15/2016 08:31:17: ############################################################################## -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:17: -MPI Rank 2: Creating virgin network. -MPI Rank 2: SimpleNetworkBuilder Using CPU -MPI Rank 2: reading script file glob_0000.scp ... 948 entries -MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 2: label set 0: 129 classes -MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 2: 12/15/2016 08:31:17: -MPI Rank 2: Model has 25 nodes. Using CPU. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:17: Training criterion: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 2: 12/15/2016 08:31:17: Evaluation criterion: EvalClassificationError = ClassificationError -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: Allocating matrices for forward and/or backward propagation. -MPI Rank 2: -MPI Rank 2: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 2: -MPI Rank 2: { W1 : [512 x 512] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 2: { B0 : [512 x 1] (gradient) -MPI Rank 2: H1 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 2: W2*H1 : [132 x 1 x *] } -MPI Rank 2: { B1 : [512 x 1] (gradient) -MPI Rank 2: H2 : [512 x 1 x *] (gradient) -MPI Rank 2: HLast : [132 x 1 x *] (gradient) } -MPI Rank 2: { HLast : [132 x 1 x *] -MPI Rank 2: W2 : [132 x 512] (gradient) } -MPI Rank 2: { H2 : [512 x 1 x *] -MPI Rank 2: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 2: { H1 : [512 x 1 x *] -MPI Rank 2: W0*features : [512 x *] (gradient) } -MPI Rank 2: { W0 : [512 x 363] (gradient) -MPI Rank 2: W0*features+B0 : [512 x 1 x *] } -MPI Rank 2: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1 : [512 x 1 x *] } -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:17: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:17: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 12/15/2016 08:31:17: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 12/15/2016 08:31:17: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 2: 12/15/2016 08:31:17: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 2: 12/15/2016 08:31:17: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 2: 12/15/2016 08:31:17: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 2: -MPI Rank 2: Initializing dataParallelSGD with FP32 aggregation. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:17: Precomputing --> 3 PreCompute nodes found. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:17: MeanOfFeatures = Mean() -MPI Rank 2: 12/15/2016 08:31:17: InvStdOfFeatures = InvStdDev() -MPI Rank 2: 12/15/2016 08:31:17: Prior = Mean() -MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:20: Precomputing --> Completed. -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:20: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:20: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 2: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.59755209 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.1259s; samplesPerSecond = 5082.3 -MPI Rank 2: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.34610347 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.1121s; samplesPerSecond = 5707.5 -MPI Rank 2: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98222493 * 640; EvalClassificationError = 0.89062500 * 640; time = 0.1140s; samplesPerSecond = 5613.8 -MPI Rank 2: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.74152761 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.1133s; samplesPerSecond = 5647.3 -MPI Rank 2: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83818495 * 640; EvalClassificationError = 0.86718750 * 640; time = 0.1115s; samplesPerSecond = 5739.4 -MPI Rank 2: 12/15/2016 08:31:20: Epoch[ 1 of 3]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71641133 * 640; EvalClassificationError = 0.87500000 * 640; time = 0.1124s; samplesPerSecond = 5694.7 -MPI Rank 2: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.41802654 * 640; EvalClassificationError = 0.79687500 * 640; time = 0.1131s; samplesPerSecond = 5659.7 -MPI Rank 2: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53832811 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.1119s; samplesPerSecond = 5718.0 -MPI Rank 2: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.50627956 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.1142s; samplesPerSecond = 5601.8 -MPI Rank 2: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.41478094 * 640; EvalClassificationError = 0.80781250 * 640; time = 0.1109s; samplesPerSecond = 5770.7 -MPI Rank 2: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.51031055 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.1135s; samplesPerSecond = 5641.1 -MPI Rank 2: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.28365293 * 640; EvalClassificationError = 0.79375000 * 640; time = 0.1114s; samplesPerSecond = 5744.4 -MPI Rank 2: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.20931888 * 640; EvalClassificationError = 0.79531250 * 640; time = 0.1113s; samplesPerSecond = 5748.1 -MPI Rank 2: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07460312 * 640; EvalClassificationError = 0.75468750 * 640; time = 0.1082s; samplesPerSecond = 5914.6 -MPI Rank 2: 12/15/2016 08:31:21: Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.97528860 * 640; EvalClassificationError = 0.72031250 * 640; time = 0.1105s; samplesPerSecond = 5792.4 -MPI Rank 2: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.11968648 * 640; EvalClassificationError = 0.74531250 * 640; time = 0.1123s; samplesPerSecond = 5699.5 -MPI Rank 2: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.84171867 * 640; EvalClassificationError = 0.71093750 * 640; time = 0.1130s; samplesPerSecond = 5665.8 -MPI Rank 2: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74031476 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.1094s; samplesPerSecond = 5850.0 -MPI Rank 2: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83857843 * 640; EvalClassificationError = 0.72656250 * 640; time = 0.1108s; samplesPerSecond = 5775.1 -MPI Rank 2: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.74632032 * 640; EvalClassificationError = 0.69218750 * 640; time = 0.1097s; samplesPerSecond = 5834.4 -MPI Rank 2: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.61032974 * 640; EvalClassificationError = 0.66250000 * 640; time = 0.1098s; samplesPerSecond = 5831.3 -MPI Rank 2: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61330475 * 640; EvalClassificationError = 0.65000000 * 640; time = 0.1110s; samplesPerSecond = 5763.8 -MPI Rank 2: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.54591535 * 640; EvalClassificationError = 0.66406250 * 640; time = 0.1096s; samplesPerSecond = 5841.9 -MPI Rank 2: 12/15/2016 08:31:22: Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.57566229 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.1103s; samplesPerSecond = 5803.0 -MPI Rank 2: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.49164700 * 640; EvalClassificationError = 0.63281250 * 640; time = 0.1096s; samplesPerSecond = 5837.8 -MPI Rank 2: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39954552 * 640; EvalClassificationError = 0.62812500 * 640; time = 0.1141s; samplesPerSecond = 5609.5 -MPI Rank 2: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27033979 * 640; EvalClassificationError = 0.59375000 * 640; time = 0.1105s; samplesPerSecond = 5793.8 -MPI Rank 2: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.52112142 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.1102s; samplesPerSecond = 5810.0 -MPI Rank 2: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27800742 * 640; EvalClassificationError = 0.59062500 * 640; time = 0.1101s; samplesPerSecond = 5812.0 -MPI Rank 2: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26783400 * 640; EvalClassificationError = 0.61093750 * 640; time = 0.1086s; samplesPerSecond = 5892.2 -MPI Rank 2: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24590123 * 640; EvalClassificationError = 0.58593750 * 640; time = 0.1105s; samplesPerSecond = 5793.1 -MPI Rank 2: 12/15/2016 08:31:23: Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.24415391 * 640; EvalClassificationError = 0.59843750 * 640; time = 0.1091s; samplesPerSecond = 5863.6 -MPI Rank 2: 12/15/2016 08:31:23: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 3.04696796 * 20480; EvalClassificationError = 0.73583984 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=3.59201s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:23: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:23: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 2: 12/15/2016 08:31:24: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.14624175 * 2560; EvalClassificationError = 0.56953125 * 2560; time = 0.2319s; samplesPerSecond = 11037.6 -MPI Rank 2: 12/15/2016 08:31:24: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.06174128 * 2560; EvalClassificationError = 0.55742187 * 2560; time = 0.2326s; samplesPerSecond = 11006.3 -MPI Rank 2: 12/15/2016 08:31:24: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.04994338 * 2560; EvalClassificationError = 0.55351562 * 2560; time = 0.2292s; samplesPerSecond = 11167.0 -MPI Rank 2: 12/15/2016 08:31:24: Epoch[ 2 of 3]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.03695538 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.2298s; samplesPerSecond = 11140.3 -MPI Rank 2: 12/15/2016 08:31:25: Epoch[ 2 of 3]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.03086227 * 2560; EvalClassificationError = 0.55664063 * 2560; time = 0.2347s; samplesPerSecond = 10908.7 -MPI Rank 2: 12/15/2016 08:31:25: Epoch[ 2 of 3]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 1.97306193 * 2560; EvalClassificationError = 0.53671875 * 2560; time = 0.2221s; samplesPerSecond = 11524.1 -MPI Rank 2: 12/15/2016 08:31:25: Epoch[ 2 of 3]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 1.96746064 * 2560; EvalClassificationError = 0.53164062 * 2560; time = 0.2240s; samplesPerSecond = 11428.0 -MPI Rank 2: 12/15/2016 08:31:25: Epoch[ 2 of 3]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 1.95498165 * 2560; EvalClassificationError = 0.53750000 * 2560; time = 0.2250s; samplesPerSecond = 11377.8 -MPI Rank 2: 12/15/2016 08:31:25: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 2.02765603 * 20480; EvalClassificationError = 0.55053711 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.83772s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:25: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:25: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 2: 12/15/2016 08:31:26: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95358449 * 10240; EvalClassificationError = 0.53603516 * 10240; time = 0.6863s; samplesPerSecond = 14920.6 -MPI Rank 2: 12/15/2016 08:31:27: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97540911 * 10240; EvalClassificationError = 0.55253906 * 10240; time = 0.6837s; samplesPerSecond = 14977.5 -MPI Rank 2: 12/15/2016 08:31:27: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96449680 * 20480; EvalClassificationError = 0.54428711 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.37654s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:27: Action "train" complete. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:27: __COMPLETED__ diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.gpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.gpu.txt deleted file mode 100644 index 1208b7bfc..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.gpu.txt +++ /dev/null @@ -1,438 +0,0 @@ -CPU info: - CPU Model Name: Intel(R) Xeon(R) CPU W3530 @ 2.80GHz - Hardware threads: 4 - Total Memory: 12580404 kB -------------------------------------------------------------------- -=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu DeviceId=0 timestamping=true numCPUThreads=1 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu/stderr -CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:31 - -C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu DeviceId=0 timestamping=true numCPUThreads=1 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu/stderr -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes on a single host (3 requested); we (1) are in (participating) -CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:31 - -C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu DeviceId=0 timestamping=true numCPUThreads=1 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu/stderr -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes on a single host (3 requested); we (2) are in (participating) -CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:31 - -C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu DeviceId=0 timestamping=true numCPUThreads=1 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu/stderr -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes on a single host (3 requested); we (0) are in (participating) -MPI Rank 0: 12/15/2016 08:31:32: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu/stderr_speechTrain.logrank0 -MPI Rank 0: CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:31 -MPI Rank 0: -MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu DeviceId=0 timestamping=true numCPUThreads=1 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu/stderr -MPI Rank 0: 12/15/2016 08:31:32: Using 1 CPU threads. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:32: ############################################################################## -MPI Rank 0: 12/15/2016 08:31:32: # # -MPI Rank 0: 12/15/2016 08:31:32: # speechTrain command (train action) # -MPI Rank 0: 12/15/2016 08:31:32: # # -MPI Rank 0: 12/15/2016 08:31:32: ############################################################################## -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:32: -MPI Rank 0: Creating virgin network. -MPI Rank 0: SimpleNetworkBuilder Using GPU 0 -MPI Rank 0: reading script file glob_0000.scp ... 948 entries -MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 0: label set 0: 129 classes -MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 0: 12/15/2016 08:31:32: -MPI Rank 0: Model has 25 nodes. Using GPU 0. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:32: Training criterion: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 0: 12/15/2016 08:31:32: Evaluation criterion: EvalClassificationError = ClassificationError -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: Allocating matrices for forward and/or backward propagation. -MPI Rank 0: -MPI Rank 0: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 0: -MPI Rank 0: { W1 : [512 x 512] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 0: { HLast : [132 x 1 x *] -MPI Rank 0: W2 : [132 x 512] (gradient) } -MPI Rank 0: { H2 : [512 x 1 x *] -MPI Rank 0: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 0: { H1 : [512 x 1 x *] -MPI Rank 0: W0*features : [512 x *] (gradient) } -MPI Rank 0: { B1 : [512 x 1] (gradient) -MPI Rank 0: H2 : [512 x 1 x *] (gradient) -MPI Rank 0: HLast : [132 x 1 x *] (gradient) } -MPI Rank 0: { W0 : [512 x 363] (gradient) -MPI Rank 0: W0*features+B0 : [512 x 1 x *] } -MPI Rank 0: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1 : [512 x 1 x *] } -MPI Rank 0: { B0 : [512 x 1] (gradient) -MPI Rank 0: H1 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 0: W2*H1 : [132 x 1 x *] } -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:32: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:32: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 12/15/2016 08:31:32: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 12/15/2016 08:31:32: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 0: 12/15/2016 08:31:32: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 0: 12/15/2016 08:31:32: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 0: 12/15/2016 08:31:32: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 0: -MPI Rank 0: Initializing dataParallelSGD with FP32 aggregation. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:32: Precomputing --> 3 PreCompute nodes found. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:32: MeanOfFeatures = Mean() -MPI Rank 0: 12/15/2016 08:31:32: InvStdOfFeatures = InvStdDev() -MPI Rank 0: 12/15/2016 08:31:32: Prior = Mean() -MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:36: Precomputing --> Completed. -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:37: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:37: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 0: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.53638635 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.1094s; samplesPerSecond = 5850.0 -MPI Rank 0: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.32517799 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0937s; samplesPerSecond = 6827.8 -MPI Rank 0: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0945s; samplesPerSecond = 6774.2 -MPI Rank 0: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.73673607 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.0929s; samplesPerSecond = 6886.2 -MPI Rank 0: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.84021876 * 640; EvalClassificationError = 0.86406250 * 640; time = 0.0889s; samplesPerSecond = 7201.9 -MPI Rank 0: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.69831379 * 640; EvalClassificationError = 0.86250000 * 640; time = 0.0903s; samplesPerSecond = 7090.6 -MPI Rank 0: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.39593109 * 640; EvalClassificationError = 0.77031250 * 640; time = 0.0864s; samplesPerSecond = 7406.6 -MPI Rank 0: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.49749690 * 640; EvalClassificationError = 0.82968750 * 640; time = 0.0897s; samplesPerSecond = 7133.4 -MPI Rank 0: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.47295705 * 640; EvalClassificationError = 0.81093750 * 640; time = 0.0892s; samplesPerSecond = 7170.9 -MPI Rank 0: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.36483701 * 640; EvalClassificationError = 0.79843750 * 640; time = 0.0895s; samplesPerSecond = 7152.0 -MPI Rank 0: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.46790699 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.0918s; samplesPerSecond = 6971.0 -MPI Rank 0: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.22104761 * 640; EvalClassificationError = 0.75625000 * 640; time = 0.0919s; samplesPerSecond = 6963.3 -MPI Rank 0: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.12504348 * 640; EvalClassificationError = 0.75312500 * 640; time = 0.0914s; samplesPerSecond = 7004.8 -MPI Rank 0: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 2.99508080 * 640; EvalClassificationError = 0.71875000 * 640; time = 0.0911s; samplesPerSecond = 7024.9 -MPI Rank 0: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.89602893 * 640; EvalClassificationError = 0.70000000 * 640; time = 0.0908s; samplesPerSecond = 7050.7 -MPI Rank 0: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.04740234 * 640; EvalClassificationError = 0.74218750 * 640; time = 0.0922s; samplesPerSecond = 6944.6 -MPI Rank 0: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.75064617 * 640; EvalClassificationError = 0.69375000 * 640; time = 0.0884s; samplesPerSecond = 7239.7 -MPI Rank 0: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.65538372 * 640; EvalClassificationError = 0.63750000 * 640; time = 0.0895s; samplesPerSecond = 7148.4 -MPI Rank 0: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.74816083 * 640; EvalClassificationError = 0.69062500 * 640; time = 0.0881s; samplesPerSecond = 7261.0 -MPI Rank 0: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.68736721 * 640; EvalClassificationError = 0.68593750 * 640; time = 0.0911s; samplesPerSecond = 7028.6 -MPI Rank 0: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53268728 * 640; EvalClassificationError = 0.64375000 * 640; time = 0.0901s; samplesPerSecond = 7099.4 -MPI Rank 0: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.53923335 * 640; EvalClassificationError = 0.63750000 * 640; time = 0.0903s; samplesPerSecond = 7090.4 -MPI Rank 0: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.48909476 * 640; EvalClassificationError = 0.64218750 * 640; time = 0.0900s; samplesPerSecond = 7114.0 -MPI Rank 0: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.50033044 * 640; EvalClassificationError = 0.65156250 * 640; time = 0.0882s; samplesPerSecond = 7256.9 -MPI Rank 0: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43569647 * 640; EvalClassificationError = 0.63125000 * 640; time = 0.0899s; samplesPerSecond = 7116.3 -MPI Rank 0: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.34293090 * 640; EvalClassificationError = 0.61562500 * 640; time = 0.0888s; samplesPerSecond = 7208.4 -MPI Rank 0: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.20428060 * 640; EvalClassificationError = 0.57812500 * 640; time = 0.0909s; samplesPerSecond = 7044.3 -MPI Rank 0: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.46886817 * 640; EvalClassificationError = 0.65468750 * 640; time = 0.0901s; samplesPerSecond = 7101.4 -MPI Rank 0: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.22066720 * 640; EvalClassificationError = 0.58906250 * 640; time = 0.0897s; samplesPerSecond = 7131.5 -MPI Rank 0: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21784279 * 640; EvalClassificationError = 0.60781250 * 640; time = 0.0910s; samplesPerSecond = 7036.7 -MPI Rank 0: 12/15/2016 08:31:40: Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.20442240 * 640; EvalClassificationError = 0.57812500 * 640; time = 0.0897s; samplesPerSecond = 7133.3 -MPI Rank 0: 12/15/2016 08:31:40: Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18215676 * 640; EvalClassificationError = 0.58593750 * 640; time = 0.0910s; samplesPerSecond = 7030.7 -MPI Rank 0: 12/15/2016 08:31:40: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.99321241 * 20480; EvalClassificationError = 0.72216797 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=2.92872s -MPI Rank 0: 12/15/2016 08:31:40: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn.1' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:40: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:40: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 0: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.08889867 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.1253s; samplesPerSecond = 20437.7 -MPI Rank 0: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.00776227 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.1002s; samplesPerSecond = 25542.3 -MPI Rank 0: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 1.99260187 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0969s; samplesPerSecond = 26424.2 -MPI Rank 0: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 1.98459924 * 2560; EvalClassificationError = 0.54648438 * 2560; time = 0.0954s; samplesPerSecond = 26820.9 -MPI Rank 0: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 1.97206449 * 2560; EvalClassificationError = 0.53984375 * 2560; time = 0.0955s; samplesPerSecond = 26809.9 -MPI Rank 0: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 1.91865552 * 2560; EvalClassificationError = 0.52109375 * 2560; time = 0.0978s; samplesPerSecond = 26175.9 -MPI Rank 0: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 1.91066643 * 2560; EvalClassificationError = 0.52148438 * 2560; time = 0.0953s; samplesPerSecond = 26854.1 -MPI Rank 0: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 1.89501440 * 2560; EvalClassificationError = 0.51992187 * 2560; time = 0.0961s; samplesPerSecond = 26633.4 -MPI Rank 0: 12/15/2016 08:31:40: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.97128286 * 20480; EvalClassificationError = 0.53715820 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.812502s -MPI Rank 0: 12/15/2016 08:31:41: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn.2' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:41: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:41: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 0: 12/15/2016 08:31:41: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.89820598 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.1751s; samplesPerSecond = 58495.2 -MPI Rank 0: 12/15/2016 08:31:41: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.91958079 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.1351s; samplesPerSecond = 75799.6 -MPI Rank 0: 12/15/2016 08:31:41: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889339 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.319187s -MPI Rank 0: 12/15/2016 08:31:41: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu/models/cntkSpeech.dnn' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:41: Action "train" complete. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:41: __COMPLETED__ -MPI Rank 1: 12/15/2016 08:31:32: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu/stderr_speechTrain.logrank1 -MPI Rank 1: CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:31 -MPI Rank 1: -MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu DeviceId=0 timestamping=true numCPUThreads=1 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu/stderr -MPI Rank 1: 12/15/2016 08:31:32: Using 1 CPU threads. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:32: ############################################################################## -MPI Rank 1: 12/15/2016 08:31:32: # # -MPI Rank 1: 12/15/2016 08:31:32: # speechTrain command (train action) # -MPI Rank 1: 12/15/2016 08:31:32: # # -MPI Rank 1: 12/15/2016 08:31:32: ############################################################################## -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:32: -MPI Rank 1: Creating virgin network. -MPI Rank 1: SimpleNetworkBuilder Using GPU 0 -MPI Rank 1: reading script file glob_0000.scp ... 948 entries -MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 1: label set 0: 129 classes -MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 1: 12/15/2016 08:31:33: -MPI Rank 1: Model has 25 nodes. Using GPU 0. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:33: Training criterion: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 1: 12/15/2016 08:31:33: Evaluation criterion: EvalClassificationError = ClassificationError -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: Allocating matrices for forward and/or backward propagation. -MPI Rank 1: -MPI Rank 1: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 1: -MPI Rank 1: { B1 : [512 x 1] (gradient) -MPI Rank 1: H2 : [512 x 1 x *] (gradient) -MPI Rank 1: HLast : [132 x 1 x *] (gradient) } -MPI Rank 1: { H1 : [512 x 1 x *] -MPI Rank 1: W0*features : [512 x *] (gradient) } -MPI Rank 1: { B0 : [512 x 1] (gradient) -MPI Rank 1: H1 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 1: W2*H1 : [132 x 1 x *] } -MPI Rank 1: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1 : [512 x 1 x *] } -MPI Rank 1: { H2 : [512 x 1 x *] -MPI Rank 1: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 1: { W0 : [512 x 363] (gradient) -MPI Rank 1: W0*features+B0 : [512 x 1 x *] } -MPI Rank 1: { W1 : [512 x 512] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 1: { HLast : [132 x 1 x *] -MPI Rank 1: W2 : [132 x 512] (gradient) } -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:33: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:33: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 12/15/2016 08:31:33: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 12/15/2016 08:31:33: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 1: 12/15/2016 08:31:33: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 1: 12/15/2016 08:31:33: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 1: 12/15/2016 08:31:33: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 1: -MPI Rank 1: Initializing dataParallelSGD with FP32 aggregation. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:33: Precomputing --> 3 PreCompute nodes found. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:33: MeanOfFeatures = Mean() -MPI Rank 1: 12/15/2016 08:31:33: InvStdOfFeatures = InvStdDev() -MPI Rank 1: 12/15/2016 08:31:33: Prior = Mean() -MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:36: Precomputing --> Completed. -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:37: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:37: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 1: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.53638635 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.1091s; samplesPerSecond = 5868.8 -MPI Rank 1: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.32517799 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0950s; samplesPerSecond = 6739.8 -MPI Rank 1: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0937s; samplesPerSecond = 6832.1 -MPI Rank 1: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.73673607 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.0930s; samplesPerSecond = 6879.0 -MPI Rank 1: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.84021876 * 640; EvalClassificationError = 0.86406250 * 640; time = 0.0888s; samplesPerSecond = 7206.4 -MPI Rank 1: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.69831379 * 640; EvalClassificationError = 0.86250000 * 640; time = 0.0903s; samplesPerSecond = 7089.0 -MPI Rank 1: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.39593109 * 640; EvalClassificationError = 0.77031250 * 640; time = 0.0863s; samplesPerSecond = 7411.8 -MPI Rank 1: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.49749690 * 640; EvalClassificationError = 0.82968750 * 640; time = 0.0894s; samplesPerSecond = 7157.7 -MPI Rank 1: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.47295705 * 640; EvalClassificationError = 0.81093750 * 640; time = 0.0901s; samplesPerSecond = 7104.1 -MPI Rank 1: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.36483701 * 640; EvalClassificationError = 0.79843750 * 640; time = 0.0894s; samplesPerSecond = 7155.7 -MPI Rank 1: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.46790699 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.0906s; samplesPerSecond = 7065.7 -MPI Rank 1: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.22104761 * 640; EvalClassificationError = 0.75625000 * 640; time = 0.0931s; samplesPerSecond = 6872.9 -MPI Rank 1: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.12504348 * 640; EvalClassificationError = 0.75312500 * 640; time = 0.0906s; samplesPerSecond = 7066.6 -MPI Rank 1: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 2.99508080 * 640; EvalClassificationError = 0.71875000 * 640; time = 0.0907s; samplesPerSecond = 7052.8 -MPI Rank 1: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.89602893 * 640; EvalClassificationError = 0.70000000 * 640; time = 0.0923s; samplesPerSecond = 6932.6 -MPI Rank 1: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.04740234 * 640; EvalClassificationError = 0.74218750 * 640; time = 0.0911s; samplesPerSecond = 7025.9 -MPI Rank 1: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.75064617 * 640; EvalClassificationError = 0.69375000 * 640; time = 0.0880s; samplesPerSecond = 7271.8 -MPI Rank 1: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.65538372 * 640; EvalClassificationError = 0.63750000 * 640; time = 0.0899s; samplesPerSecond = 7120.0 -MPI Rank 1: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.74816083 * 640; EvalClassificationError = 0.69062500 * 640; time = 0.0877s; samplesPerSecond = 7298.0 -MPI Rank 1: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.68736721 * 640; EvalClassificationError = 0.68593750 * 640; time = 0.0927s; samplesPerSecond = 6906.4 -MPI Rank 1: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53268728 * 640; EvalClassificationError = 0.64375000 * 640; time = 0.0885s; samplesPerSecond = 7228.0 -MPI Rank 1: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.53923335 * 640; EvalClassificationError = 0.63750000 * 640; time = 0.0902s; samplesPerSecond = 7092.0 -MPI Rank 1: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.48909476 * 640; EvalClassificationError = 0.64218750 * 640; time = 0.0912s; samplesPerSecond = 7019.3 -MPI Rank 1: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.50033044 * 640; EvalClassificationError = 0.65156250 * 640; time = 0.0874s; samplesPerSecond = 7325.8 -MPI Rank 1: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43569647 * 640; EvalClassificationError = 0.63125000 * 640; time = 0.0912s; samplesPerSecond = 7019.3 -MPI Rank 1: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.34293090 * 640; EvalClassificationError = 0.61562500 * 640; time = 0.0884s; samplesPerSecond = 7240.9 -MPI Rank 1: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.20428060 * 640; EvalClassificationError = 0.57812500 * 640; time = 0.0897s; samplesPerSecond = 7137.8 -MPI Rank 1: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.46886817 * 640; EvalClassificationError = 0.65468750 * 640; time = 0.0913s; samplesPerSecond = 7010.4 -MPI Rank 1: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.22066720 * 640; EvalClassificationError = 0.58906250 * 640; time = 0.0902s; samplesPerSecond = 7096.1 -MPI Rank 1: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21784279 * 640; EvalClassificationError = 0.60781250 * 640; time = 0.0910s; samplesPerSecond = 7034.0 -MPI Rank 1: 12/15/2016 08:31:40: Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.20442240 * 640; EvalClassificationError = 0.57812500 * 640; time = 0.0898s; samplesPerSecond = 7130.7 -MPI Rank 1: 12/15/2016 08:31:40: Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18215676 * 640; EvalClassificationError = 0.58593750 * 640; time = 0.0894s; samplesPerSecond = 7157.3 -MPI Rank 1: 12/15/2016 08:31:40: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.99321241 * 20480; EvalClassificationError = 0.72216797 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=2.9298s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:40: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:40: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 1: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.08889867 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.1257s; samplesPerSecond = 20373.6 -MPI Rank 1: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.00776227 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.1007s; samplesPerSecond = 25418.5 -MPI Rank 1: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 1.99260187 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0953s; samplesPerSecond = 26873.8 -MPI Rank 1: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 1.98459924 * 2560; EvalClassificationError = 0.54648438 * 2560; time = 0.0954s; samplesPerSecond = 26825.4 -MPI Rank 1: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 1.97206449 * 2560; EvalClassificationError = 0.53984375 * 2560; time = 0.0955s; samplesPerSecond = 26815.5 -MPI Rank 1: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 1.91865552 * 2560; EvalClassificationError = 0.52109375 * 2560; time = 0.0990s; samplesPerSecond = 25855.5 -MPI Rank 1: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 1.91066643 * 2560; EvalClassificationError = 0.52148438 * 2560; time = 0.0941s; samplesPerSecond = 27206.3 -MPI Rank 1: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 1.89501440 * 2560; EvalClassificationError = 0.51992187 * 2560; time = 0.0976s; samplesPerSecond = 26220.6 -MPI Rank 1: 12/15/2016 08:31:40: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.97128286 * 20480; EvalClassificationError = 0.53715820 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.813236s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:41: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:41: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 1: 12/15/2016 08:31:41: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.89820598 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.1746s; samplesPerSecond = 58641.3 -MPI Rank 1: 12/15/2016 08:31:41: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.91958079 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.1351s; samplesPerSecond = 75822.1 -MPI Rank 1: 12/15/2016 08:31:41: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889339 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.318744s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:41: Action "train" complete. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:41: __COMPLETED__ -MPI Rank 2: 12/15/2016 08:31:33: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu/stderr_speechTrain.logrank2 -MPI Rank 2: CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:31 -MPI Rank 2: -MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu DeviceId=0 timestamping=true numCPUThreads=1 stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantization@release_gpu/stderr -MPI Rank 2: 12/15/2016 08:31:33: Using 1 CPU threads. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:33: ############################################################################## -MPI Rank 2: 12/15/2016 08:31:33: # # -MPI Rank 2: 12/15/2016 08:31:33: # speechTrain command (train action) # -MPI Rank 2: 12/15/2016 08:31:33: # # -MPI Rank 2: 12/15/2016 08:31:33: ############################################################################## -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:33: -MPI Rank 2: Creating virgin network. -MPI Rank 2: SimpleNetworkBuilder Using GPU 0 -MPI Rank 2: reading script file glob_0000.scp ... 948 entries -MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 2: label set 0: 129 classes -MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 2: 12/15/2016 08:31:33: -MPI Rank 2: Model has 25 nodes. Using GPU 0. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:33: Training criterion: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 2: 12/15/2016 08:31:33: Evaluation criterion: EvalClassificationError = ClassificationError -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: Allocating matrices for forward and/or backward propagation. -MPI Rank 2: -MPI Rank 2: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 2: -MPI Rank 2: { W0 : [512 x 363] (gradient) -MPI Rank 2: W0*features+B0 : [512 x 1 x *] } -MPI Rank 2: { H2 : [512 x 1 x *] -MPI Rank 2: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 2: { H1 : [512 x 1 x *] -MPI Rank 2: W0*features : [512 x *] (gradient) } -MPI Rank 2: { W1 : [512 x 512] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 2: { B1 : [512 x 1] (gradient) -MPI Rank 2: H2 : [512 x 1 x *] (gradient) -MPI Rank 2: HLast : [132 x 1 x *] (gradient) } -MPI Rank 2: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1 : [512 x 1 x *] } -MPI Rank 2: { HLast : [132 x 1 x *] -MPI Rank 2: W2 : [132 x 512] (gradient) } -MPI Rank 2: { B0 : [512 x 1] (gradient) -MPI Rank 2: H1 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 2: W2*H1 : [132 x 1 x *] } -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:33: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:33: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 12/15/2016 08:31:33: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 12/15/2016 08:31:33: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 2: 12/15/2016 08:31:33: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 2: 12/15/2016 08:31:33: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 2: 12/15/2016 08:31:33: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 2: -MPI Rank 2: Initializing dataParallelSGD with FP32 aggregation. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:33: Precomputing --> 3 PreCompute nodes found. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:33: MeanOfFeatures = Mean() -MPI Rank 2: 12/15/2016 08:31:33: InvStdOfFeatures = InvStdDev() -MPI Rank 2: 12/15/2016 08:31:33: Prior = Mean() -MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:37: Precomputing --> Completed. -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:37: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:37: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 2: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.53638635 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0989s; samplesPerSecond = 6468.4 -MPI Rank 2: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.32517799 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0938s; samplesPerSecond = 6825.4 -MPI Rank 2: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0941s; samplesPerSecond = 6803.7 -MPI Rank 2: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.73673607 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.0930s; samplesPerSecond = 6884.7 -MPI Rank 2: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.84021876 * 640; EvalClassificationError = 0.86406250 * 640; time = 0.0888s; samplesPerSecond = 7204.3 -MPI Rank 2: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.69831379 * 640; EvalClassificationError = 0.86250000 * 640; time = 0.0915s; samplesPerSecond = 6996.5 -MPI Rank 2: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.39593109 * 640; EvalClassificationError = 0.77031250 * 640; time = 0.0864s; samplesPerSecond = 7409.8 -MPI Rank 2: 12/15/2016 08:31:37: Epoch[ 1 of 3]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.49749690 * 640; EvalClassificationError = 0.82968750 * 640; time = 0.0891s; samplesPerSecond = 7181.5 -MPI Rank 2: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.47295705 * 640; EvalClassificationError = 0.81093750 * 640; time = 0.0896s; samplesPerSecond = 7139.6 -MPI Rank 2: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.36483701 * 640; EvalClassificationError = 0.79843750 * 640; time = 0.0894s; samplesPerSecond = 7158.1 -MPI Rank 2: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.46790699 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.0918s; samplesPerSecond = 6973.9 -MPI Rank 2: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.22104761 * 640; EvalClassificationError = 0.75625000 * 640; time = 0.0920s; samplesPerSecond = 6959.7 -MPI Rank 2: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.12504348 * 640; EvalClassificationError = 0.75312500 * 640; time = 0.0909s; samplesPerSecond = 7038.5 -MPI Rank 2: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 2.99508080 * 640; EvalClassificationError = 0.71875000 * 640; time = 0.0915s; samplesPerSecond = 6992.9 -MPI Rank 2: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.89602893 * 640; EvalClassificationError = 0.70000000 * 640; time = 0.0915s; samplesPerSecond = 6993.1 -MPI Rank 2: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.04740234 * 640; EvalClassificationError = 0.74218750 * 640; time = 0.0911s; samplesPerSecond = 7025.1 -MPI Rank 2: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.75064617 * 640; EvalClassificationError = 0.69375000 * 640; time = 0.0888s; samplesPerSecond = 7209.2 -MPI Rank 2: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.65538372 * 640; EvalClassificationError = 0.63750000 * 640; time = 0.0904s; samplesPerSecond = 7082.5 -MPI Rank 2: 12/15/2016 08:31:38: Epoch[ 1 of 3]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.74816083 * 640; EvalClassificationError = 0.69062500 * 640; time = 0.0873s; samplesPerSecond = 7330.5 -MPI Rank 2: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.68736721 * 640; EvalClassificationError = 0.68593750 * 640; time = 0.0919s; samplesPerSecond = 6965.8 -MPI Rank 2: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.53268728 * 640; EvalClassificationError = 0.64375000 * 640; time = 0.0894s; samplesPerSecond = 7162.0 -MPI Rank 2: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.53923335 * 640; EvalClassificationError = 0.63750000 * 640; time = 0.0903s; samplesPerSecond = 7089.6 -MPI Rank 2: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.48909476 * 640; EvalClassificationError = 0.64218750 * 640; time = 0.0900s; samplesPerSecond = 7112.1 -MPI Rank 2: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.50033044 * 640; EvalClassificationError = 0.65156250 * 640; time = 0.0878s; samplesPerSecond = 7287.1 -MPI Rank 2: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.43569647 * 640; EvalClassificationError = 0.63125000 * 640; time = 0.0910s; samplesPerSecond = 7030.7 -MPI Rank 2: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.34293090 * 640; EvalClassificationError = 0.61562500 * 640; time = 0.0892s; samplesPerSecond = 7171.9 -MPI Rank 2: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.20428060 * 640; EvalClassificationError = 0.57812500 * 640; time = 0.0896s; samplesPerSecond = 7139.4 -MPI Rank 2: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.46886817 * 640; EvalClassificationError = 0.65468750 * 640; time = 0.0901s; samplesPerSecond = 7102.0 -MPI Rank 2: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.22066720 * 640; EvalClassificationError = 0.58906250 * 640; time = 0.0905s; samplesPerSecond = 7068.1 -MPI Rank 2: 12/15/2016 08:31:39: Epoch[ 1 of 3]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.21784279 * 640; EvalClassificationError = 0.60781250 * 640; time = 0.0910s; samplesPerSecond = 7034.0 -MPI Rank 2: 12/15/2016 08:31:40: Epoch[ 1 of 3]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.20442240 * 640; EvalClassificationError = 0.57812500 * 640; time = 0.0898s; samplesPerSecond = 7130.6 -MPI Rank 2: 12/15/2016 08:31:40: Epoch[ 1 of 3]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.18215676 * 640; EvalClassificationError = 0.58593750 * 640; time = 0.0902s; samplesPerSecond = 7092.1 -MPI Rank 2: 12/15/2016 08:31:40: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 2.99321241 * 20480; EvalClassificationError = 0.72216797 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=2.92432s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:40: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:40: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 2: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.08889867 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.1131s; samplesPerSecond = 22628.0 -MPI Rank 2: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.00776227 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.1011s; samplesPerSecond = 25323.2 -MPI Rank 2: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 1.99260187 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0961s; samplesPerSecond = 26645.6 -MPI Rank 2: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 1.98459924 * 2560; EvalClassificationError = 0.54648438 * 2560; time = 0.0955s; samplesPerSecond = 26819.8 -MPI Rank 2: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 1.97206449 * 2560; EvalClassificationError = 0.53984375 * 2560; time = 0.0955s; samplesPerSecond = 26808.8 -MPI Rank 2: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 1.91865552 * 2560; EvalClassificationError = 0.52109375 * 2560; time = 0.0978s; samplesPerSecond = 26169.4 -MPI Rank 2: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 1.91066643 * 2560; EvalClassificationError = 0.52148438 * 2560; time = 0.0953s; samplesPerSecond = 26865.9 -MPI Rank 2: 12/15/2016 08:31:40: Epoch[ 2 of 3]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 1.89501440 * 2560; EvalClassificationError = 0.51992187 * 2560; time = 0.0968s; samplesPerSecond = 26434.8 -MPI Rank 2: 12/15/2016 08:31:40: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 1.97128286 * 20480; EvalClassificationError = 0.53715820 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=0.811981s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:41: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:41: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. -MPI Rank 2: 12/15/2016 08:31:41: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.89820598 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.1679s; samplesPerSecond = 60975.6 -MPI Rank 2: 12/15/2016 08:31:41: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.91958079 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.1350s; samplesPerSecond = 75841.2 -MPI Rank 2: 12/15/2016 08:31:41: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889339 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.317066s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:41: Action "train" complete. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:41: __COMPLETED__ diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/run-test b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/run-test deleted file mode 100755 index 31d51acfc..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/run-test +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -. $TEST_ROOT_DIR/run-test-common - -OriginalTestDir=../../../DNN/ParallelNoQuantization -ConfigDir=$TEST_DIR/../../../DNN -LogFileName=stderr -Instances=3 -NumCPUThreads=$(threadsPerInstance $Instances) - -(cd $TEST_DIR/$OriginalTestDir && md5sum baseline*) | (cd $TEST_DIR && md5sum --status -c -) -if [ $? != 0 ]; then - echo Error: Baselines must match original test. Copy from $OriginalTestDir. - exit 1 -fi - -# cntkmpirun -cntkmpirun "-n $Instances" cntk.cntk "speechTrain=[reader=[readerType=HTKDeserializers]] numCPUThreads=$NumCPUThreads" -ExitCode=$? -sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank0 -sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank1 -sed 's/^/MPI Rank 2: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank2 -exit $ExitCode diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/testcases.yml b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/testcases.yml deleted file mode 100644 index ceb0104ce..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/testcases.yml +++ /dev/null @@ -1,40 +0,0 @@ -dataDir: ../../../Data -tags: - # running on every BVT job in 'P' (Speech) leg in Debug-GPU and Release-CPU configurations: - # - bvt-p ((build_sku == 'gpu') or (build_sku == '1bitsgd')) and ((flavor == 'release') if (os == 'windows') else ((flavor == 'debug') ^ (device == 'cpu'))) - # running unconditionally on every Nightly job in 'P' leg - - nightly-p ((build_sku == 'gpu') or (build_sku == '1bitsgd')) - -testCases: - Must train epochs in exactly same order and parameters for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Starting Epoch {{integer}} - - learning rate per sample = {{float}} - - momentum = {{float}} - - Epochs must be finished with expected results for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Finished Epoch[{{integer}} of {{integer}}] - - CrossEntropyWithSoftmax = {{float,tolerance=0.01%}} - - EvalClassificationError = {{float,tolerance=0.01%}} - - learningRatePerSample = {{float,tolerance=0.001%}} - - Per-minibatch training results must match for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}} - - " * {{integer}}; " - - CrossEntropyWithSoftmax = {{float,tolerance=0.01%}} - - EvalClassificationError = {{float,tolerance=0.01%}} - - DataParallelSGD training parameters must match for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Starting minibatch loop - - DataParallelSGD training - - myRank = {{integer}} - - numNodes = 3 - - numGradientBits = 32 - - distributed reading is ENABLED diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt deleted file mode 100644 index b6a0e5115..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt +++ /dev/null @@ -1,613 +0,0 @@ -CPU info: - CPU Model Name: Intel(R) Xeon(R) CPU W3530 @ 2.80GHz - Hardware threads: 4 - Total Memory: 12580404 kB -------------------------------------------------------------------- -=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr -CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:46 - -C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes on a single host (3 requested); we (1) are in (participating) -CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:45 - -C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes on a single host (3 requested); we (2) are in (participating) -CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:46 - -C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes on a single host (3 requested); we (0) are in (participating) -MPI Rank 0: 12/15/2016 08:31:46: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank0 -MPI Rank 0: CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:46 -MPI Rank 0: -MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr -MPI Rank 0: 12/15/2016 08:31:46: Using 1 CPU threads. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:46: ############################################################################## -MPI Rank 0: 12/15/2016 08:31:46: # # -MPI Rank 0: 12/15/2016 08:31:46: # speechTrain command (train action) # -MPI Rank 0: 12/15/2016 08:31:46: # # -MPI Rank 0: 12/15/2016 08:31:46: ############################################################################## -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:46: -MPI Rank 0: Creating virgin network. -MPI Rank 0: SimpleNetworkBuilder Using CPU -MPI Rank 0: reading script file glob_0000.scp ... 948 entries -MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 0: label set 0: 129 classes -MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 0: 12/15/2016 08:31:46: -MPI Rank 0: Model has 25 nodes. Using CPU. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:46: Training criterion: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 0: 12/15/2016 08:31:46: Evaluation criterion: EvalClassificationError = ClassificationError -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: Allocating matrices for forward and/or backward propagation. -MPI Rank 0: -MPI Rank 0: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 0: -MPI Rank 0: { W1 : [512 x 512] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 0: { W0 : [512 x 363] (gradient) -MPI Rank 0: W0*features+B0 : [512 x 1 x *] } -MPI Rank 0: { H2 : [512 x 1 x *] -MPI Rank 0: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 0: { B1 : [512 x 1] (gradient) -MPI Rank 0: H2 : [512 x 1 x *] (gradient) -MPI Rank 0: HLast : [132 x 1 x *] (gradient) } -MPI Rank 0: { B0 : [512 x 1] (gradient) -MPI Rank 0: H1 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 0: W2*H1 : [132 x 1 x *] } -MPI Rank 0: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1 : [512 x 1 x *] } -MPI Rank 0: { H1 : [512 x 1 x *] -MPI Rank 0: W0*features : [512 x *] (gradient) } -MPI Rank 0: { HLast : [132 x 1 x *] -MPI Rank 0: W2 : [132 x 512] (gradient) } -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:46: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:46: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 12/15/2016 08:31:46: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 12/15/2016 08:31:46: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 0: 12/15/2016 08:31:46: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 0: 12/15/2016 08:31:46: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 0: 12/15/2016 08:31:46: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 0: -MPI Rank 0: Initializing dataParallelSGD with FP64 aggregation. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:46: Precomputing --> 3 PreCompute nodes found. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:46: MeanOfFeatures = Mean() -MPI Rank 0: 12/15/2016 08:31:46: InvStdOfFeatures = InvStdDev() -MPI Rank 0: 12/15/2016 08:31:46: Prior = Mean() -MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:49: Precomputing --> Completed. -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:50: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:50: Starting minibatch loop. -MPI Rank 0: 12/15/2016 08:31:50: Epoch[ 1 of 4]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.59755198 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.3011s; samplesPerSecond = 2125.9 -MPI Rank 0: 12/15/2016 08:31:51: Epoch[ 1 of 4]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.34610349 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.2555s; samplesPerSecond = 2505.3 -MPI Rank 0: 12/15/2016 08:31:51: Epoch[ 1 of 4]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98222516 * 640; EvalClassificationError = 0.89062500 * 640; time = 0.2622s; samplesPerSecond = 2441.2 -MPI Rank 0: 12/15/2016 08:31:51: Epoch[ 1 of 4]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.74152814 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.2643s; samplesPerSecond = 2421.2 -MPI Rank 0: 12/15/2016 08:31:51: Epoch[ 1 of 4]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83818572 * 640; EvalClassificationError = 0.86718750 * 640; time = 0.2635s; samplesPerSecond = 2428.9 -MPI Rank 0: 12/15/2016 08:31:52: Epoch[ 1 of 4]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71641238 * 640; EvalClassificationError = 0.87500000 * 640; time = 0.2628s; samplesPerSecond = 2435.1 -MPI Rank 0: 12/15/2016 08:31:52: Epoch[ 1 of 4]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.41802791 * 640; EvalClassificationError = 0.79687500 * 640; time = 0.2533s; samplesPerSecond = 2526.9 -MPI Rank 0: 12/15/2016 08:31:52: Epoch[ 1 of 4]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53832947 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2528s; samplesPerSecond = 2532.0 -MPI Rank 0: 12/15/2016 08:31:52: Epoch[ 1 of 4]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.50628076 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.2544s; samplesPerSecond = 2516.2 -MPI Rank 0: 12/15/2016 08:31:53: Epoch[ 1 of 4]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.41478252 * 640; EvalClassificationError = 0.80781250 * 640; time = 0.2536s; samplesPerSecond = 2523.9 -MPI Rank 0: 12/15/2016 08:31:53: Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.51031210 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2549s; samplesPerSecond = 2510.6 -MPI Rank 0: 12/15/2016 08:31:53: Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.28365485 * 640; EvalClassificationError = 0.79375000 * 640; time = 0.2537s; samplesPerSecond = 2522.5 -MPI Rank 0: 12/15/2016 08:31:53: Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.20932117 * 640; EvalClassificationError = 0.79531250 * 640; time = 0.2479s; samplesPerSecond = 2581.4 -MPI Rank 0: 12/15/2016 08:31:54: Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07460534 * 640; EvalClassificationError = 0.75468750 * 640; time = 0.2515s; samplesPerSecond = 2544.8 -MPI Rank 0: 12/15/2016 08:31:54: Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.97529104 * 640; EvalClassificationError = 0.72031250 * 640; time = 0.2818s; samplesPerSecond = 2270.8 -MPI Rank 0: 12/15/2016 08:31:54: Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.11968882 * 640; EvalClassificationError = 0.74531250 * 640; time = 0.2569s; samplesPerSecond = 2491.5 -MPI Rank 0: 12/15/2016 08:31:54: Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.84172140 * 640; EvalClassificationError = 0.71093750 * 640; time = 0.2631s; samplesPerSecond = 2432.4 -MPI Rank 0: 12/15/2016 08:31:55: Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74031745 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2709s; samplesPerSecond = 2362.6 -MPI Rank 0: 12/15/2016 08:31:55: Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83858085 * 640; EvalClassificationError = 0.72656250 * 640; time = 0.2640s; samplesPerSecond = 2424.3 -MPI Rank 0: 12/15/2016 08:31:55: Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.74632253 * 640; EvalClassificationError = 0.69218750 * 640; time = 0.2571s; samplesPerSecond = 2489.6 -MPI Rank 0: 12/15/2016 08:31:55: Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.61033254 * 640; EvalClassificationError = 0.66250000 * 640; time = 0.2532s; samplesPerSecond = 2527.7 -MPI Rank 0: 12/15/2016 08:31:56: Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61330754 * 640; EvalClassificationError = 0.65000000 * 640; time = 0.2541s; samplesPerSecond = 2518.4 -MPI Rank 0: 12/15/2016 08:31:56: Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.54591810 * 640; EvalClassificationError = 0.66406250 * 640; time = 0.2543s; samplesPerSecond = 2516.2 -MPI Rank 0: 12/15/2016 08:31:56: Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.57566512 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2625s; samplesPerSecond = 2437.8 -MPI Rank 0: 12/15/2016 08:31:57: Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.49164945 * 640; EvalClassificationError = 0.63281250 * 640; time = 0.2599s; samplesPerSecond = 2462.6 -MPI Rank 0: 12/15/2016 08:31:57: Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39954796 * 640; EvalClassificationError = 0.62812500 * 640; time = 0.2540s; samplesPerSecond = 2519.3 -MPI Rank 0: 12/15/2016 08:31:57: Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27034227 * 640; EvalClassificationError = 0.59375000 * 640; time = 0.2558s; samplesPerSecond = 2502.1 -MPI Rank 0: 12/15/2016 08:31:57: Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.52112387 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2505s; samplesPerSecond = 2555.2 -MPI Rank 0: 12/15/2016 08:31:58: Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27800991 * 640; EvalClassificationError = 0.59062500 * 640; time = 0.2525s; samplesPerSecond = 2534.8 -MPI Rank 0: 12/15/2016 08:31:58: Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26783634 * 640; EvalClassificationError = 0.61093750 * 640; time = 0.2509s; samplesPerSecond = 2550.6 -MPI Rank 0: 12/15/2016 08:31:58: Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24590355 * 640; EvalClassificationError = 0.58593750 * 640; time = 0.2521s; samplesPerSecond = 2538.7 -MPI Rank 0: 12/15/2016 08:31:58: Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.24415615 * 640; EvalClassificationError = 0.59843750 * 640; time = 0.2508s; samplesPerSecond = 2551.9 -MPI Rank 0: 12/15/2016 08:31:58: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.04696987 * 20480; EvalClassificationError = 0.73583984 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=8.28871s -MPI Rank 0: 12/15/2016 08:31:58: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.1' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:58: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:31:58: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 0: Actual gradient aggregation time: 0.029024 -MPI Rank 0: Async gradient aggregation wait time: 2e-006 -MPI Rank 0: Actual gradient aggregation time: 0.011366 -MPI Rank 0: 12/15/2016 08:31:59: Epoch[ 2 of 4]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19109241 * 2304; EvalClassificationError = 0.58246528 * 2304; time = 0.4068s; samplesPerSecond = 5663.4 -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.009713 -MPI Rank 0: Async gradient aggregation wait time: 2e-006 -MPI Rank 0: Actual gradient aggregation time: 0.007983 -MPI Rank 0: 12/15/2016 08:31:59: Epoch[ 2 of 4]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.20697464 * 2560; EvalClassificationError = 0.59453125 * 2560; time = 0.4134s; samplesPerSecond = 6193.0 -MPI Rank 0: Async gradient aggregation wait time: 2e-006 -MPI Rank 0: Actual gradient aggregation time: 0.008095 -MPI Rank 0: Async gradient aggregation wait time: 2e-006 -MPI Rank 0: Actual gradient aggregation time: 0.009539 -MPI Rank 0: 12/15/2016 08:32:00: Epoch[ 2 of 4]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.23618716 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.4040s; samplesPerSecond = 6336.0 -MPI Rank 0: Async gradient aggregation wait time: 2e-006 -MPI Rank 0: Actual gradient aggregation time: 0.007831 -MPI Rank 0: Async gradient aggregation wait time: 2e-006 -MPI Rank 0: Actual gradient aggregation time: 0.007867 -MPI Rank 0: 12/15/2016 08:32:00: Epoch[ 2 of 4]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.21810382 * 2560; EvalClassificationError = 0.59609375 * 2560; time = 0.4054s; samplesPerSecond = 6314.1 -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.007854 -MPI Rank 0: Async gradient aggregation wait time: 2e-006 -MPI Rank 0: Actual gradient aggregation time: 0.007794 -MPI Rank 0: 12/15/2016 08:32:00: Epoch[ 2 of 4]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.17778205 * 2560; EvalClassificationError = 0.59414062 * 2560; time = 0.3964s; samplesPerSecond = 6458.2 -MPI Rank 0: Async gradient aggregation wait time: 2e-006 -MPI Rank 0: Actual gradient aggregation time: 0.013175 -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.012979 -MPI Rank 0: 12/15/2016 08:32:01: Epoch[ 2 of 4]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.13452559 * 2560; EvalClassificationError = 0.57734375 * 2560; time = 0.3897s; samplesPerSecond = 6568.9 -MPI Rank 0: Async gradient aggregation wait time: 2e-006 -MPI Rank 0: Actual gradient aggregation time: 0.007794 -MPI Rank 0: Async gradient aggregation wait time: 1e-006 -MPI Rank 0: Actual gradient aggregation time: 0.007836 -MPI Rank 0: 12/15/2016 08:32:01: Epoch[ 2 of 4]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 2.13087789 * 2560; EvalClassificationError = 0.57265625 * 2560; time = 0.3849s; samplesPerSecond = 6650.9 -MPI Rank 0: Async gradient aggregation wait time: 1e-006 -MPI Rank 0: Actual gradient aggregation time: 0.011382 -MPI Rank 0: Async gradient aggregation wait time: 2e-006 -MPI Rank 0: Actual gradient aggregation time: 0.008394 -MPI Rank 0: 12/15/2016 08:32:02: Epoch[ 2 of 4]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 2.11200101 * 2560; EvalClassificationError = 0.58632812 * 2560; time = 0.3899s; samplesPerSecond = 6566.0 -MPI Rank 0: Async gradient aggregation wait time: 0.00391 -MPI Rank 0: Actual gradient aggregation time: 0.006681 -MPI Rank 0: 12/15/2016 08:32:02: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.17402050 * 20480; EvalClassificationError = 0.58750000 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=3.21196s -MPI Rank 0: 12/15/2016 08:32:02: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.2' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:32:02: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:32:02: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.00922 -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.009044 -MPI Rank 0: 12/15/2016 08:32:03: Epoch[ 3 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 2.15723941 * 9216; EvalClassificationError = 0.56488715 * 9216; time = 1.3161s; samplesPerSecond = 7002.6 -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.022458 -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.016563 -MPI Rank 0: 12/15/2016 08:32:04: Epoch[ 3 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 2.02453665 * 10240; EvalClassificationError = 0.55771484 * 10240; time = 1.2215s; samplesPerSecond = 8383.3 -MPI Rank 0: 12/15/2016 08:32:04: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.08437881 * 20480; EvalClassificationError = 0.56079102 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=2.55852s -MPI Rank 0: 12/15/2016 08:32:04: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn.3' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:32:04: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:32:04: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.012264 -MPI Rank 0: Async gradient aggregation wait time: 2e-006 -MPI Rank 0: Actual gradient aggregation time: 0.037037 -MPI Rank 0: 12/15/2016 08:32:05: Epoch[ 4 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.96502938 * 9216; EvalClassificationError = 0.53190104 * 9216; time = 1.2404s; samplesPerSecond = 7430.0 -MPI Rank 0: Async gradient aggregation wait time: 4e-006 -MPI Rank 0: Actual gradient aggregation time: 0.048557 -MPI Rank 0: Async gradient aggregation wait time: 3e-006 -MPI Rank 0: Actual gradient aggregation time: 0.135479 -MPI Rank 0: 12/15/2016 08:32:07: Epoch[ 4 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.95947098 * 10240; EvalClassificationError = 0.53603516 * 10240; time = 1.1937s; samplesPerSecond = 8578.4 -MPI Rank 0: Async gradient aggregation wait time: 0.008052 -MPI Rank 0: 12/15/2016 08:32:07: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.96369079 * 20480; EvalClassificationError = 0.53471680 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=2.5704s -MPI Rank 0: 12/15/2016 08:32:07: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/models/cntkSpeech.dnn' -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:32:07: Action "train" complete. -MPI Rank 0: -MPI Rank 0: 12/15/2016 08:32:07: __COMPLETED__ -MPI Rank 1: 12/15/2016 08:31:46: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank1 -MPI Rank 1: CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:46 -MPI Rank 1: -MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr -MPI Rank 1: 12/15/2016 08:31:46: Using 1 CPU threads. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:46: ############################################################################## -MPI Rank 1: 12/15/2016 08:31:46: # # -MPI Rank 1: 12/15/2016 08:31:46: # speechTrain command (train action) # -MPI Rank 1: 12/15/2016 08:31:46: # # -MPI Rank 1: 12/15/2016 08:31:46: ############################################################################## -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:46: -MPI Rank 1: Creating virgin network. -MPI Rank 1: SimpleNetworkBuilder Using CPU -MPI Rank 1: reading script file glob_0000.scp ... 948 entries -MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 1: label set 0: 129 classes -MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 1: 12/15/2016 08:31:47: -MPI Rank 1: Model has 25 nodes. Using CPU. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:47: Training criterion: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 1: 12/15/2016 08:31:47: Evaluation criterion: EvalClassificationError = ClassificationError -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: Allocating matrices for forward and/or backward propagation. -MPI Rank 1: -MPI Rank 1: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 1: -MPI Rank 1: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1 : [512 x 1 x *] } -MPI Rank 1: { W1 : [512 x 512] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 1: { H1 : [512 x 1 x *] -MPI Rank 1: W0*features : [512 x *] (gradient) } -MPI Rank 1: { W0 : [512 x 363] (gradient) -MPI Rank 1: W0*features+B0 : [512 x 1 x *] } -MPI Rank 1: { H2 : [512 x 1 x *] -MPI Rank 1: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 1: { HLast : [132 x 1 x *] -MPI Rank 1: W2 : [132 x 512] (gradient) } -MPI Rank 1: { B0 : [512 x 1] (gradient) -MPI Rank 1: H1 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 1: W2*H1 : [132 x 1 x *] } -MPI Rank 1: { B1 : [512 x 1] (gradient) -MPI Rank 1: H2 : [512 x 1 x *] (gradient) -MPI Rank 1: HLast : [132 x 1 x *] (gradient) } -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:47: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:47: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 12/15/2016 08:31:47: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 12/15/2016 08:31:47: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 1: 12/15/2016 08:31:47: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 1: 12/15/2016 08:31:47: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 1: 12/15/2016 08:31:47: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 1: -MPI Rank 1: Initializing dataParallelSGD with FP64 aggregation. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:47: Precomputing --> 3 PreCompute nodes found. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:47: MeanOfFeatures = Mean() -MPI Rank 1: 12/15/2016 08:31:47: InvStdOfFeatures = InvStdDev() -MPI Rank 1: 12/15/2016 08:31:47: Prior = Mean() -MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:49: Precomputing --> Completed. -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:50: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:50: Starting minibatch loop. -MPI Rank 1: 12/15/2016 08:31:50: Epoch[ 1 of 4]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.59755198 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.2852s; samplesPerSecond = 2244.1 -MPI Rank 1: 12/15/2016 08:31:51: Epoch[ 1 of 4]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.34610349 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.2670s; samplesPerSecond = 2396.8 -MPI Rank 1: 12/15/2016 08:31:51: Epoch[ 1 of 4]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98222516 * 640; EvalClassificationError = 0.89062500 * 640; time = 0.2622s; samplesPerSecond = 2440.9 -MPI Rank 1: 12/15/2016 08:31:51: Epoch[ 1 of 4]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.74152814 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.2643s; samplesPerSecond = 2421.9 -MPI Rank 1: 12/15/2016 08:31:51: Epoch[ 1 of 4]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83818572 * 640; EvalClassificationError = 0.86718750 * 640; time = 0.2647s; samplesPerSecond = 2417.9 -MPI Rank 1: 12/15/2016 08:31:52: Epoch[ 1 of 4]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71641238 * 640; EvalClassificationError = 0.87500000 * 640; time = 0.2584s; samplesPerSecond = 2476.9 -MPI Rank 1: 12/15/2016 08:31:52: Epoch[ 1 of 4]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.41802791 * 640; EvalClassificationError = 0.79687500 * 640; time = 0.2513s; samplesPerSecond = 2546.6 -MPI Rank 1: 12/15/2016 08:31:52: Epoch[ 1 of 4]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53832947 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2509s; samplesPerSecond = 2550.5 -MPI Rank 1: 12/15/2016 08:31:52: Epoch[ 1 of 4]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.50628076 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.2514s; samplesPerSecond = 2546.1 -MPI Rank 1: 12/15/2016 08:31:53: Epoch[ 1 of 4]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.41478252 * 640; EvalClassificationError = 0.80781250 * 640; time = 0.2542s; samplesPerSecond = 2517.8 -MPI Rank 1: 12/15/2016 08:31:53: Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.51031210 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2542s; samplesPerSecond = 2517.8 -MPI Rank 1: 12/15/2016 08:31:53: Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.28365485 * 640; EvalClassificationError = 0.79375000 * 640; time = 0.2548s; samplesPerSecond = 2511.4 -MPI Rank 1: 12/15/2016 08:31:53: Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.20932117 * 640; EvalClassificationError = 0.79531250 * 640; time = 0.2502s; samplesPerSecond = 2558.4 -MPI Rank 1: 12/15/2016 08:31:54: Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07460534 * 640; EvalClassificationError = 0.75468750 * 640; time = 0.2521s; samplesPerSecond = 2538.3 -MPI Rank 1: 12/15/2016 08:31:54: Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.97529104 * 640; EvalClassificationError = 0.72031250 * 640; time = 0.2861s; samplesPerSecond = 2237.3 -MPI Rank 1: 12/15/2016 08:31:54: Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.11968882 * 640; EvalClassificationError = 0.74531250 * 640; time = 0.2576s; samplesPerSecond = 2484.3 -MPI Rank 1: 12/15/2016 08:31:54: Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.84172140 * 640; EvalClassificationError = 0.71093750 * 640; time = 0.2539s; samplesPerSecond = 2520.4 -MPI Rank 1: 12/15/2016 08:31:55: Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74031745 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2546s; samplesPerSecond = 2513.7 -MPI Rank 1: 12/15/2016 08:31:55: Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83858085 * 640; EvalClassificationError = 0.72656250 * 640; time = 0.2671s; samplesPerSecond = 2396.0 -MPI Rank 1: 12/15/2016 08:31:55: Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.74632253 * 640; EvalClassificationError = 0.69218750 * 640; time = 0.2600s; samplesPerSecond = 2461.4 -MPI Rank 1: 12/15/2016 08:31:55: Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.61033254 * 640; EvalClassificationError = 0.66250000 * 640; time = 0.2583s; samplesPerSecond = 2478.2 -MPI Rank 1: 12/15/2016 08:31:56: Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61330754 * 640; EvalClassificationError = 0.65000000 * 640; time = 0.2632s; samplesPerSecond = 2431.4 -MPI Rank 1: 12/15/2016 08:31:56: Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.54591810 * 640; EvalClassificationError = 0.66406250 * 640; time = 0.2588s; samplesPerSecond = 2473.3 -MPI Rank 1: 12/15/2016 08:31:56: Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.57566512 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2669s; samplesPerSecond = 2398.2 -MPI Rank 1: 12/15/2016 08:31:56: Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.49164945 * 640; EvalClassificationError = 0.63281250 * 640; time = 0.2586s; samplesPerSecond = 2474.4 -MPI Rank 1: 12/15/2016 08:31:57: Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39954796 * 640; EvalClassificationError = 0.62812500 * 640; time = 0.2501s; samplesPerSecond = 2559.0 -MPI Rank 1: 12/15/2016 08:31:57: Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27034227 * 640; EvalClassificationError = 0.59375000 * 640; time = 0.2513s; samplesPerSecond = 2547.1 -MPI Rank 1: 12/15/2016 08:31:57: Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.52112387 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2536s; samplesPerSecond = 2523.9 -MPI Rank 1: 12/15/2016 08:31:57: Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27800991 * 640; EvalClassificationError = 0.59062500 * 640; time = 0.2534s; samplesPerSecond = 2525.9 -MPI Rank 1: 12/15/2016 08:31:58: Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26783634 * 640; EvalClassificationError = 0.61093750 * 640; time = 0.2532s; samplesPerSecond = 2527.3 -MPI Rank 1: 12/15/2016 08:31:58: Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24590355 * 640; EvalClassificationError = 0.58593750 * 640; time = 0.2518s; samplesPerSecond = 2541.6 -MPI Rank 1: 12/15/2016 08:31:58: Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.24415615 * 640; EvalClassificationError = 0.59843750 * 640; time = 0.2511s; samplesPerSecond = 2548.7 -MPI Rank 1: 12/15/2016 08:31:58: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.04696987 * 20480; EvalClassificationError = 0.73583984 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=8.28342s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:58: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:31:58: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 1: Actual gradient aggregation time: 0.030247 -MPI Rank 1: Async gradient aggregation wait time: 2e-006 -MPI Rank 1: Actual gradient aggregation time: 0.017114 -MPI Rank 1: 12/15/2016 08:31:59: Epoch[ 2 of 4]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19109241 * 2304; EvalClassificationError = 0.58246528 * 2304; time = 0.3927s; samplesPerSecond = 5866.6 -MPI Rank 1: Async gradient aggregation wait time: 2e-006 -MPI Rank 1: Actual gradient aggregation time: 0.028061 -MPI Rank 1: Async gradient aggregation wait time: 0.005708 -MPI Rank 1: Actual gradient aggregation time: 0.044276 -MPI Rank 1: 12/15/2016 08:31:59: Epoch[ 2 of 4]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.20697464 * 2560; EvalClassificationError = 0.59453125 * 2560; time = 0.3941s; samplesPerSecond = 6495.0 -MPI Rank 1: Async gradient aggregation wait time: 0.009357 -MPI Rank 1: Actual gradient aggregation time: 0.039388 -MPI Rank 1: Async gradient aggregation wait time: 0.005254 -MPI Rank 1: Actual gradient aggregation time: 0.036219 -MPI Rank 1: 12/15/2016 08:32:00: Epoch[ 2 of 4]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.23618716 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.4050s; samplesPerSecond = 6321.0 -MPI Rank 1: Async gradient aggregation wait time: 0.002649 -MPI Rank 1: Actual gradient aggregation time: 0.043082 -MPI Rank 1: Async gradient aggregation wait time: 0.006427 -MPI Rank 1: Actual gradient aggregation time: 0.038851 -MPI Rank 1: 12/15/2016 08:32:00: Epoch[ 2 of 4]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.21810382 * 2560; EvalClassificationError = 0.59609375 * 2560; time = 0.4063s; samplesPerSecond = 6301.0 -MPI Rank 1: Async gradient aggregation wait time: 0.00754 -MPI Rank 1: Actual gradient aggregation time: 0.040924 -MPI Rank 1: Async gradient aggregation wait time: 0.008271 -MPI Rank 1: Actual gradient aggregation time: 0.040546 -MPI Rank 1: 12/15/2016 08:32:00: Epoch[ 2 of 4]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.17778205 * 2560; EvalClassificationError = 0.59414062 * 2560; time = 0.4008s; samplesPerSecond = 6387.6 -MPI Rank 1: Async gradient aggregation wait time: 0.000114 -MPI Rank 1: Actual gradient aggregation time: 0.038789 -MPI Rank 1: Async gradient aggregation wait time: 3e-006 -MPI Rank 1: Actual gradient aggregation time: 0.035122 -MPI Rank 1: 12/15/2016 08:32:01: Epoch[ 2 of 4]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.13452559 * 2560; EvalClassificationError = 0.57734375 * 2560; time = 0.3869s; samplesPerSecond = 6617.2 -MPI Rank 1: Async gradient aggregation wait time: 0.00517 -MPI Rank 1: Actual gradient aggregation time: 0.037537 -MPI Rank 1: Async gradient aggregation wait time: 0.003319 -MPI Rank 1: Actual gradient aggregation time: 0.037987 -MPI Rank 1: 12/15/2016 08:32:01: Epoch[ 2 of 4]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 2.13087789 * 2560; EvalClassificationError = 0.57265625 * 2560; time = 0.3933s; samplesPerSecond = 6509.3 -MPI Rank 1: Async gradient aggregation wait time: 0.00473 -MPI Rank 1: Actual gradient aggregation time: 0.038653 -MPI Rank 1: Async gradient aggregation wait time: 7e-005 -MPI Rank 1: Actual gradient aggregation time: 0.034378 -MPI Rank 1: 12/15/2016 08:32:01: Epoch[ 2 of 4]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 2.11200101 * 2560; EvalClassificationError = 0.58632812 * 2560; time = 0.3829s; samplesPerSecond = 6686.4 -MPI Rank 1: Async gradient aggregation wait time: 0.032732 -MPI Rank 1: Actual gradient aggregation time: 0.012683 -MPI Rank 1: 12/15/2016 08:32:02: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.17402050 * 20480; EvalClassificationError = 0.58750000 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=3.21197s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:32:02: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:32:02: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 1: Async gradient aggregation wait time: 1e-006 -MPI Rank 1: Actual gradient aggregation time: 0.037301 -MPI Rank 1: Async gradient aggregation wait time: 2e-006 -MPI Rank 1: Actual gradient aggregation time: 0.082708 -MPI Rank 1: 12/15/2016 08:32:03: Epoch[ 3 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 2.15723941 * 9216; EvalClassificationError = 0.56488715 * 9216; time = 1.2386s; samplesPerSecond = 7440.4 -MPI Rank 1: Async gradient aggregation wait time: 3e-006 -MPI Rank 1: Actual gradient aggregation time: 0.112156 -MPI Rank 1: Async gradient aggregation wait time: 3e-006 -MPI Rank 1: Actual gradient aggregation time: 0.075956 -MPI Rank 1: 12/15/2016 08:32:04: Epoch[ 3 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 2.02453665 * 10240; EvalClassificationError = 0.55771484 * 10240; time = 1.2680s; samplesPerSecond = 8075.8 -MPI Rank 1: 12/15/2016 08:32:04: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.08437881 * 20480; EvalClassificationError = 0.56079102 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=2.55944s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:32:04: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:32:04: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 1: Async gradient aggregation wait time: 4e-006 -MPI Rank 1: Actual gradient aggregation time: 0.023745 -MPI Rank 1: Async gradient aggregation wait time: 2e-006 -MPI Rank 1: Actual gradient aggregation time: 0.008013 -MPI Rank 1: 12/15/2016 08:32:06: Epoch[ 4 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.96502938 * 9216; EvalClassificationError = 0.53190104 * 9216; time = 1.2758s; samplesPerSecond = 7223.5 -MPI Rank 1: Async gradient aggregation wait time: 3e-006 -MPI Rank 1: Actual gradient aggregation time: 0.01483 -MPI Rank 1: Async gradient aggregation wait time: 3e-006 -MPI Rank 1: Actual gradient aggregation time: 0.028558 -MPI Rank 1: 12/15/2016 08:32:07: Epoch[ 4 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.95947098 * 10240; EvalClassificationError = 0.53603516 * 10240; time = 1.2730s; samplesPerSecond = 8044.2 -MPI Rank 1: Async gradient aggregation wait time: 0.005241 -MPI Rank 1: 12/15/2016 08:32:07: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.96369079 * 20480; EvalClassificationError = 0.53471680 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=2.57039s -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:32:07: Action "train" complete. -MPI Rank 1: -MPI Rank 1: 12/15/2016 08:32:07: __COMPLETED__ -MPI Rank 2: 12/15/2016 08:31:47: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr_speechTrain.logrank2 -MPI Rank 2: CNTK 2.0.beta6.0+ (HEAD 5f1fab, Dec 15 2016 06:29:34) on cntk-muc03 at 2016/12/15 08:31:45 -MPI Rank 2: -MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu DeviceId=-1 timestamping=true numCPUThreads=1 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20161215082748.614918\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_cpu/stderr -MPI Rank 2: 12/15/2016 08:31:47: Using 1 CPU threads. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:47: ############################################################################## -MPI Rank 2: 12/15/2016 08:31:47: # # -MPI Rank 2: 12/15/2016 08:31:47: # speechTrain command (train action) # -MPI Rank 2: 12/15/2016 08:31:47: # # -MPI Rank 2: 12/15/2016 08:31:47: ############################################################################## -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:47: -MPI Rank 2: Creating virgin network. -MPI Rank 2: SimpleNetworkBuilder Using CPU -MPI Rank 2: reading script file glob_0000.scp ... 948 entries -MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 2: label set 0: 129 classes -MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 2: 12/15/2016 08:31:47: -MPI Rank 2: Model has 25 nodes. Using CPU. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:47: Training criterion: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 2: 12/15/2016 08:31:47: Evaluation criterion: EvalClassificationError = ClassificationError -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: Allocating matrices for forward and/or backward propagation. -MPI Rank 2: -MPI Rank 2: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 2: -MPI Rank 2: { HLast : [132 x 1 x *] -MPI Rank 2: W2 : [132 x 512] (gradient) } -MPI Rank 2: { W0 : [512 x 363] (gradient) -MPI Rank 2: W0*features+B0 : [512 x 1 x *] } -MPI Rank 2: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1 : [512 x 1 x *] } -MPI Rank 2: { H2 : [512 x 1 x *] -MPI Rank 2: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 2: { B1 : [512 x 1] (gradient) -MPI Rank 2: H2 : [512 x 1 x *] (gradient) -MPI Rank 2: HLast : [132 x 1 x *] (gradient) } -MPI Rank 2: { W1 : [512 x 512] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 2: { H1 : [512 x 1 x *] -MPI Rank 2: W0*features : [512 x *] (gradient) } -MPI Rank 2: { B0 : [512 x 1] (gradient) -MPI Rank 2: H1 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 2: W2*H1 : [132 x 1 x *] } -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:47: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:47: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 12/15/2016 08:31:47: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 12/15/2016 08:31:47: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 2: 12/15/2016 08:31:47: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 2: 12/15/2016 08:31:47: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 2: 12/15/2016 08:31:47: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 2: -MPI Rank 2: Initializing dataParallelSGD with FP64 aggregation. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:47: Precomputing --> 3 PreCompute nodes found. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:47: MeanOfFeatures = Mean() -MPI Rank 2: 12/15/2016 08:31:47: InvStdOfFeatures = InvStdDev() -MPI Rank 2: 12/15/2016 08:31:47: Prior = Mean() -MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:50: Precomputing --> Completed. -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:50: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:50: Starting minibatch loop. -MPI Rank 2: 12/15/2016 08:31:50: Epoch[ 1 of 4]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.59755198 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.2984s; samplesPerSecond = 2145.1 -MPI Rank 2: 12/15/2016 08:31:51: Epoch[ 1 of 4]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.34610349 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.2685s; samplesPerSecond = 2383.9 -MPI Rank 2: 12/15/2016 08:31:51: Epoch[ 1 of 4]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98222516 * 640; EvalClassificationError = 0.89062500 * 640; time = 0.2603s; samplesPerSecond = 2459.1 -MPI Rank 2: 12/15/2016 08:31:51: Epoch[ 1 of 4]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.74152814 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.2523s; samplesPerSecond = 2536.2 -MPI Rank 2: 12/15/2016 08:31:51: Epoch[ 1 of 4]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83818572 * 640; EvalClassificationError = 0.86718750 * 640; time = 0.2533s; samplesPerSecond = 2526.6 -MPI Rank 2: 12/15/2016 08:31:52: Epoch[ 1 of 4]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71641238 * 640; EvalClassificationError = 0.87500000 * 640; time = 0.2614s; samplesPerSecond = 2448.8 -MPI Rank 2: 12/15/2016 08:31:52: Epoch[ 1 of 4]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.41802791 * 640; EvalClassificationError = 0.79687500 * 640; time = 0.2527s; samplesPerSecond = 2532.5 -MPI Rank 2: 12/15/2016 08:31:52: Epoch[ 1 of 4]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53832947 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2533s; samplesPerSecond = 2527.1 -MPI Rank 2: 12/15/2016 08:31:52: Epoch[ 1 of 4]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.50628076 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.2543s; samplesPerSecond = 2516.4 -MPI Rank 2: 12/15/2016 08:31:53: Epoch[ 1 of 4]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.41478252 * 640; EvalClassificationError = 0.80781250 * 640; time = 0.2539s; samplesPerSecond = 2521.0 -MPI Rank 2: 12/15/2016 08:31:53: Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.51031210 * 640; EvalClassificationError = 0.82812500 * 640; time = 0.2545s; samplesPerSecond = 2514.5 -MPI Rank 2: 12/15/2016 08:31:53: Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.28365485 * 640; EvalClassificationError = 0.79375000 * 640; time = 0.2522s; samplesPerSecond = 2537.9 -MPI Rank 2: 12/15/2016 08:31:53: Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.20932117 * 640; EvalClassificationError = 0.79531250 * 640; time = 0.2487s; samplesPerSecond = 2573.4 -MPI Rank 2: 12/15/2016 08:31:54: Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.07460534 * 640; EvalClassificationError = 0.75468750 * 640; time = 0.2520s; samplesPerSecond = 2539.7 -MPI Rank 2: 12/15/2016 08:31:54: Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.97529104 * 640; EvalClassificationError = 0.72031250 * 640; time = 0.2805s; samplesPerSecond = 2281.6 -MPI Rank 2: 12/15/2016 08:31:54: Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.11968882 * 640; EvalClassificationError = 0.74531250 * 640; time = 0.2580s; samplesPerSecond = 2480.2 -MPI Rank 2: 12/15/2016 08:31:54: Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.84172140 * 640; EvalClassificationError = 0.71093750 * 640; time = 0.2642s; samplesPerSecond = 2422.4 -MPI Rank 2: 12/15/2016 08:31:55: Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.74031745 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2820s; samplesPerSecond = 2269.5 -MPI Rank 2: 12/15/2016 08:31:55: Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.83858085 * 640; EvalClassificationError = 0.72656250 * 640; time = 0.2554s; samplesPerSecond = 2506.1 -MPI Rank 2: 12/15/2016 08:31:55: Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.74632253 * 640; EvalClassificationError = 0.69218750 * 640; time = 0.2599s; samplesPerSecond = 2462.9 -MPI Rank 2: 12/15/2016 08:31:55: Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.61033254 * 640; EvalClassificationError = 0.66250000 * 640; time = 0.2579s; samplesPerSecond = 2481.4 -MPI Rank 2: 12/15/2016 08:31:56: Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.61330754 * 640; EvalClassificationError = 0.65000000 * 640; time = 0.2629s; samplesPerSecond = 2434.6 -MPI Rank 2: 12/15/2016 08:31:56: Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.54591810 * 640; EvalClassificationError = 0.66406250 * 640; time = 0.2602s; samplesPerSecond = 2460.1 -MPI Rank 2: 12/15/2016 08:31:56: Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.57566512 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2617s; samplesPerSecond = 2445.7 -MPI Rank 2: 12/15/2016 08:31:56: Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.49164945 * 640; EvalClassificationError = 0.63281250 * 640; time = 0.2518s; samplesPerSecond = 2541.9 -MPI Rank 2: 12/15/2016 08:31:57: Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.39954796 * 640; EvalClassificationError = 0.62812500 * 640; time = 0.2529s; samplesPerSecond = 2530.4 -MPI Rank 2: 12/15/2016 08:31:57: Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27034227 * 640; EvalClassificationError = 0.59375000 * 640; time = 0.2549s; samplesPerSecond = 2510.5 -MPI Rank 2: 12/15/2016 08:31:57: Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.52112387 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.2542s; samplesPerSecond = 2517.4 -MPI Rank 2: 12/15/2016 08:31:58: Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27800991 * 640; EvalClassificationError = 0.59062500 * 640; time = 0.2546s; samplesPerSecond = 2513.8 -MPI Rank 2: 12/15/2016 08:31:58: Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26783634 * 640; EvalClassificationError = 0.61093750 * 640; time = 0.2511s; samplesPerSecond = 2548.9 -MPI Rank 2: 12/15/2016 08:31:58: Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24590355 * 640; EvalClassificationError = 0.58593750 * 640; time = 0.2517s; samplesPerSecond = 2542.9 -MPI Rank 2: 12/15/2016 08:31:58: Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.24415615 * 640; EvalClassificationError = 0.59843750 * 640; time = 0.2514s; samplesPerSecond = 2545.7 -MPI Rank 2: 12/15/2016 08:31:58: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.04696987 * 20480; EvalClassificationError = 0.73583984 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=8.29391s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:58: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:31:58: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 2: Actual gradient aggregation time: 0.014339 -MPI Rank 2: Async gradient aggregation wait time: 0.02282 -MPI Rank 2: Actual gradient aggregation time: 0.034024 -MPI Rank 2: 12/15/2016 08:31:59: Epoch[ 2 of 4]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19109241 * 2304; EvalClassificationError = 0.58246528 * 2304; time = 0.3624s; samplesPerSecond = 6357.5 -MPI Rank 2: Async gradient aggregation wait time: 0.011787 -MPI Rank 2: Actual gradient aggregation time: 0.044361 -MPI Rank 2: Async gradient aggregation wait time: 0.017445 -MPI Rank 2: Actual gradient aggregation time: 0.042474 -MPI Rank 2: 12/15/2016 08:31:59: Epoch[ 2 of 4]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.20697464 * 2560; EvalClassificationError = 0.59453125 * 2560; time = 0.4107s; samplesPerSecond = 6233.9 -MPI Rank 2: Async gradient aggregation wait time: 0.016332 -MPI Rank 2: Actual gradient aggregation time: 0.037706 -MPI Rank 2: Async gradient aggregation wait time: 0.015016 -MPI Rank 2: Actual gradient aggregation time: 0.037001 -MPI Rank 2: 12/15/2016 08:32:00: Epoch[ 2 of 4]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.23618716 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.3980s; samplesPerSecond = 6431.9 -MPI Rank 2: Async gradient aggregation wait time: 0.018632 -MPI Rank 2: Actual gradient aggregation time: 0.044457 -MPI Rank 2: Async gradient aggregation wait time: 0.015279 -MPI Rank 2: Actual gradient aggregation time: 0.03699 -MPI Rank 2: 12/15/2016 08:32:00: Epoch[ 2 of 4]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.21810382 * 2560; EvalClassificationError = 0.59609375 * 2560; time = 0.4133s; samplesPerSecond = 6193.6 -MPI Rank 2: Async gradient aggregation wait time: 0.016877 -MPI Rank 2: Actual gradient aggregation time: 0.039 -MPI Rank 2: Async gradient aggregation wait time: 0.012806 -MPI Rank 2: Actual gradient aggregation time: 0.040168 -MPI Rank 2: 12/15/2016 08:32:00: Epoch[ 2 of 4]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.17778205 * 2560; EvalClassificationError = 0.59414062 * 2560; time = 0.3948s; samplesPerSecond = 6483.8 -MPI Rank 2: Async gradient aggregation wait time: 0.019087 -MPI Rank 2: Actual gradient aggregation time: 0.041333 -MPI Rank 2: Async gradient aggregation wait time: 0.001464 -MPI Rank 2: Actual gradient aggregation time: 0.0523 -MPI Rank 2: 12/15/2016 08:32:01: Epoch[ 2 of 4]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.13452559 * 2560; EvalClassificationError = 0.57734375 * 2560; time = 0.3891s; samplesPerSecond = 6579.9 -MPI Rank 2: Async gradient aggregation wait time: 0.013255 -MPI Rank 2: Actual gradient aggregation time: 0.035552 -MPI Rank 2: Async gradient aggregation wait time: 0.012247 -MPI Rank 2: Actual gradient aggregation time: 0.039475 -MPI Rank 2: 12/15/2016 08:32:01: Epoch[ 2 of 4]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 2.13087789 * 2560; EvalClassificationError = 0.57265625 * 2560; time = 0.3970s; samplesPerSecond = 6448.7 -MPI Rank 2: Async gradient aggregation wait time: 0.01151 -MPI Rank 2: Actual gradient aggregation time: 0.038946 -MPI Rank 2: Async gradient aggregation wait time: 0.014233 -MPI Rank 2: Actual gradient aggregation time: 0.03598 -MPI Rank 2: 12/15/2016 08:32:01: Epoch[ 2 of 4]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 2.11200101 * 2560; EvalClassificationError = 0.58632812 * 2560; time = 0.3830s; samplesPerSecond = 6683.7 -MPI Rank 2: Async gradient aggregation wait time: 0.032617 -MPI Rank 2: Actual gradient aggregation time: 0.009137 -MPI Rank 2: 12/15/2016 08:32:02: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.17402050 * 20480; EvalClassificationError = 0.58750000 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=3.19815s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:32:02: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:32:02: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 2: Async gradient aggregation wait time: 0.048113 -MPI Rank 2: Actual gradient aggregation time: 0.121149 -MPI Rank 2: Async gradient aggregation wait time: 0.06524 -MPI Rank 2: Actual gradient aggregation time: 0.122137 -MPI Rank 2: 12/15/2016 08:32:03: Epoch[ 3 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 2.15723941 * 9216; EvalClassificationError = 0.56488715 * 9216; time = 1.1785s; samplesPerSecond = 7820.3 -MPI Rank 2: Async gradient aggregation wait time: 0.02704 -MPI Rank 2: Actual gradient aggregation time: 0.13324 -MPI Rank 2: Async gradient aggregation wait time: 0.041943 -MPI Rank 2: Actual gradient aggregation time: 0.120703 -MPI Rank 2: 12/15/2016 08:32:04: Epoch[ 3 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 2.02453665 * 10240; EvalClassificationError = 0.55771484 * 10240; time = 1.2475s; samplesPerSecond = 8208.4 -MPI Rank 2: 12/15/2016 08:32:04: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.08437881 * 20480; EvalClassificationError = 0.56079102 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=2.54358s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:32:04: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:32:04: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 2: Async gradient aggregation wait time: 3e-006 -MPI Rank 2: Actual gradient aggregation time: 0.070919 -MPI Rank 2: Async gradient aggregation wait time: 0.030892 -MPI Rank 2: Actual gradient aggregation time: 0.125888 -MPI Rank 2: 12/15/2016 08:32:05: Epoch[ 4 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.96502938 * 9216; EvalClassificationError = 0.53190104 * 9216; time = 1.1196s; samplesPerSecond = 8231.4 -MPI Rank 2: Async gradient aggregation wait time: 0.034789 -MPI Rank 2: Actual gradient aggregation time: 0.121302 -MPI Rank 2: Async gradient aggregation wait time: 0.035583 -MPI Rank 2: Actual gradient aggregation time: 0.138767 -MPI Rank 2: 12/15/2016 08:32:07: Epoch[ 4 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.95947098 * 10240; EvalClassificationError = 0.53603516 * 10240; time = 1.2808s; samplesPerSecond = 7994.8 -MPI Rank 2: Async gradient aggregation wait time: 0.008184 -MPI Rank 2: 12/15/2016 08:32:07: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.96369079 * 20480; EvalClassificationError = 0.53471680 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=2.53683s -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:32:07: Action "train" complete. -MPI Rank 2: -MPI Rank 2: 12/15/2016 08:32:07: __COMPLETED__ diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt deleted file mode 100644 index 9382da138..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt +++ /dev/null @@ -1,1933 +0,0 @@ -CPU info: - CPU Model Name: Intel(R) Xeon(R) CPU W3565 @ 3.20GHz - Hardware threads: 8 - Total Memory: 12580436 kB -------------------------------------------------------------------- -=== Running C:\Program Files\Microsoft MPI\Bin\/mpiexec.exe -n 3 C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu DeviceId=0 timestamping=true numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr -------------------------------------------------------------------- -Build info: - - Built time: Aug 16 2016 02:54:53 - Last modified date: Fri Aug 12 05:31:21 2016 - Build type: Release - Build target: GPU - With 1bit-SGD: no - Math lib: mkl - CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 - CUB_PATH: c:\src\cub-1.4.1 - CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda - Build Branch: HEAD - Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 - Built by svcphil on Philly-Pool3 - Build Path: c:\Jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -------------------------------------------------------------------- -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPIWrapper: initializing MPI -------------------------------------------------------------------- -Build info: - - Built time: Aug 16 2016 02:54:53 - Last modified date: Fri Aug 12 05:31:21 2016 - Build type: Release - Build target: GPU - With 1bit-SGD: no - Math lib: mkl - CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 - CUB_PATH: c:\src\cub-1.4.1 - CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda - Build Branch: HEAD - Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 - Built by svcphil on Philly-Pool3 - Build Path: c:\Jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -------------------------------------------------------------------- -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPIWrapper: initializing MPI -------------------------------------------------------------------- -Build info: - - Built time: Aug 16 2016 02:54:53 - Last modified date: Fri Aug 12 05:31:21 2016 - Build type: Release - Build target: GPU - With 1bit-SGD: no - Math lib: mkl - CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 - CUB_PATH: c:\src\cub-1.4.1 - CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda - Build Branch: HEAD - Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 - Built by svcphil on Philly-Pool3 - Build Path: c:\Jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -------------------------------------------------------------------- -Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPIWrapper: initializing MPI -ping [requestnodes (before change)]: 3 nodes pinging each other -ping [requestnodes (before change)]: 3 nodes pinging each other -ping [requestnodes (before change)]: 3 nodes pinging each other -ping [requestnodes (before change)]: all 3 nodes responded -ping [requestnodes (before change)]: all 3 nodes responded -ping [requestnodes (before change)]: all 3 nodes responded -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (2) are in (participating) -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (1) are in (participating) -requestnodes [MPIWrapper]: using 3 out of 3 MPI nodes (3 requested); we (0) are in (participating) -ping [requestnodes (after change)]: 3 nodes pinging each other -ping [requestnodes (after change)]: 3 nodes pinging each other -ping [requestnodes (after change)]: 3 nodes pinging each other -ping [requestnodes (after change)]: all 3 nodes responded -ping [requestnodes (after change)]: all 3 nodes responded -ping [requestnodes (after change)]: all 3 nodes responded -mpihelper: we are cog 2 in a gearbox of 3 -mpihelper: we are cog 1 in a gearbox of 3 -mpihelper: we are cog 0 in a gearbox of 3 -ping [mpihelper]: 3 nodes pinging each other -ping [mpihelper]: 3 nodes pinging each other -ping [mpihelper]: 3 nodes pinging each other -ping [mpihelper]: all 3 nodes responded -ping [mpihelper]: all 3 nodes responded -ping [mpihelper]: all 3 nodes responded -MPI Rank 0: 08/16/2016 03:04:10: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank0 -MPI Rank 0: 08/16/2016 03:04:10: ------------------------------------------------------------------- -MPI Rank 0: 08/16/2016 03:04:10: Build info: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:10: Built time: Aug 16 2016 02:54:53 -MPI Rank 0: 08/16/2016 03:04:10: Last modified date: Fri Aug 12 05:31:21 2016 -MPI Rank 0: 08/16/2016 03:04:10: Build type: Release -MPI Rank 0: 08/16/2016 03:04:10: Build target: GPU -MPI Rank 0: 08/16/2016 03:04:10: With 1bit-SGD: no -MPI Rank 0: 08/16/2016 03:04:10: Math lib: mkl -MPI Rank 0: 08/16/2016 03:04:10: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 -MPI Rank 0: 08/16/2016 03:04:10: CUB_PATH: c:\src\cub-1.4.1 -MPI Rank 0: 08/16/2016 03:04:10: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda -MPI Rank 0: 08/16/2016 03:04:10: Build Branch: HEAD -MPI Rank 0: 08/16/2016 03:04:10: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 -MPI Rank 0: 08/16/2016 03:04:10: Built by svcphil on Philly-Pool3 -MPI Rank 0: 08/16/2016 03:04:10: Build Path: c:\Jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -MPI Rank 0: 08/16/2016 03:04:10: ------------------------------------------------------------------- -MPI Rank 0: 08/16/2016 03:04:10: ------------------------------------------------------------------- -MPI Rank 0: 08/16/2016 03:04:10: GPU info: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:10: Device[0]: cores = 2496; computeCapability = 5.2; type = "Quadro M4000"; memory = 8192 MB -MPI Rank 0: 08/16/2016 03:04:10: ------------------------------------------------------------------- -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:10: Running on cntk-muc01 at 2016/08/16 03:04:10 -MPI Rank 0: 08/16/2016 03:04:10: Command line: -MPI Rank 0: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu DeviceId=0 timestamping=true numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:10: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>> -MPI Rank 0: 08/16/2016 03:04:10: precision = "float" -MPI Rank 0: command = speechTrain -MPI Rank 0: deviceId = $DeviceId$ -MPI Rank 0: parallelTrain = true -MPI Rank 0: speechTrain = [ -MPI Rank 0: action = "train" -MPI Rank 0: modelPath = "$RunDir$/models/cntkSpeech.dnn" -MPI Rank 0: deviceId = $DeviceId$ -MPI Rank 0: traceLevel = 1 -MPI Rank 0: SimpleNetworkBuilder = [ -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 0: evalCriterion = "ClassificationError" -MPI Rank 0: layerTypes = "Sigmoid" -MPI Rank 0: initValueScale = 1.0 -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: uniformInit = true -MPI Rank 0: needPrior = true -MPI Rank 0: ] -MPI Rank 0: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = 'CE' -MPI Rank 0: evalCriterion = 'Err' -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: L = Length(layerSizes)-1 // number of model layers -MPI Rank 0: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 0: featNorm = if applyMeanVarNorm -MPI Rank 0: then MeanVarNorm(features) -MPI Rank 0: else features -MPI Rank 0: layers[layer:1..L-1] = if layer > 1 -MPI Rank 0: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 0: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 0: CE = if trainingCriterion == 'CE' -MPI Rank 0: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 0: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 0: Err = if evalCriterion == 'Err' then -MPI Rank 0: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 0: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 0: logPrior = LogPrior(labels) -MPI Rank 0: // TODO: how to add a tag to an infix operation? -MPI Rank 0: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 0: ] -MPI Rank 0: SGD = [ -MPI Rank 0: epochSize = 20480 -MPI Rank 0: minibatchSize = 64:256:1024 -MPI Rank 0: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 0: numMBsToShowResult = 10 -MPI Rank 0: momentumPerMB = 0.9:0.656119 -MPI Rank 0: dropoutRate = 0.0 -MPI Rank 0: maxEpochs = 3 -MPI Rank 0: keepCheckPointFiles = true -MPI Rank 0: clippingThresholdPerSample = 1#INF -MPI Rank 0: ParallelTrain = [ -MPI Rank 0: parallelizationMethod = "DataParallelSGD" -MPI Rank 0: distributedMBReading = true -MPI Rank 0: DataParallelSGD = [ -MPI Rank 0: gradientBits = 32 -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: AutoAdjust = [ -MPI Rank 0: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 0: loadBestModel = true -MPI Rank 0: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 0: learnRateDecreaseFactor = 0.5 -MPI Rank 0: learnRateIncreaseFactor = 1.382 -MPI Rank 0: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: reader = [ -MPI Rank 0: readerType = "HTKMLFReader" -MPI Rank 0: readMethod = "blockRandomize" -MPI Rank 0: miniBatchMode = "partial" -MPI Rank 0: randomize = "auto" -MPI Rank 0: verbosity = 0 -MPI Rank 0: useMersenneTwisterRand=true -MPI Rank 0: features = [ -MPI Rank 0: dim = 363 -MPI Rank 0: type = "real" -MPI Rank 0: scpFile = "glob_0000.scp" -MPI Rank 0: ] -MPI Rank 0: labels = [ -MPI Rank 0: mlfFile = "$DataDir$/glob_0000.mlf" -MPI Rank 0: labelMappingFile = "$DataDir$/state.list" -MPI Rank 0: labelDim = 132 -MPI Rank 0: labelType = "category" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 0: DeviceId=0 -MPI Rank 0: timestamping=true -MPI Rank 0: numCPUThreads=2 -MPI Rank 0: precision=double -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]] -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:10: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<< -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:10: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 0: 08/16/2016 03:04:10: precision = "float" -MPI Rank 0: command = speechTrain -MPI Rank 0: deviceId = 0 -MPI Rank 0: parallelTrain = true -MPI Rank 0: speechTrain = [ -MPI Rank 0: action = "train" -MPI Rank 0: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn" -MPI Rank 0: deviceId = 0 -MPI Rank 0: traceLevel = 1 -MPI Rank 0: SimpleNetworkBuilder = [ -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 0: evalCriterion = "ClassificationError" -MPI Rank 0: layerTypes = "Sigmoid" -MPI Rank 0: initValueScale = 1.0 -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: uniformInit = true -MPI Rank 0: needPrior = true -MPI Rank 0: ] -MPI Rank 0: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = 'CE' -MPI Rank 0: evalCriterion = 'Err' -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: L = Length(layerSizes)-1 // number of model layers -MPI Rank 0: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 0: featNorm = if applyMeanVarNorm -MPI Rank 0: then MeanVarNorm(features) -MPI Rank 0: else features -MPI Rank 0: layers[layer:1..L-1] = if layer > 1 -MPI Rank 0: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 0: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 0: CE = if trainingCriterion == 'CE' -MPI Rank 0: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 0: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 0: Err = if evalCriterion == 'Err' then -MPI Rank 0: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 0: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 0: logPrior = LogPrior(labels) -MPI Rank 0: // TODO: how to add a tag to an infix operation? -MPI Rank 0: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 0: ] -MPI Rank 0: SGD = [ -MPI Rank 0: epochSize = 20480 -MPI Rank 0: minibatchSize = 64:256:1024 -MPI Rank 0: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 0: numMBsToShowResult = 10 -MPI Rank 0: momentumPerMB = 0.9:0.656119 -MPI Rank 0: dropoutRate = 0.0 -MPI Rank 0: maxEpochs = 3 -MPI Rank 0: keepCheckPointFiles = true -MPI Rank 0: clippingThresholdPerSample = 1#INF -MPI Rank 0: ParallelTrain = [ -MPI Rank 0: parallelizationMethod = "DataParallelSGD" -MPI Rank 0: distributedMBReading = true -MPI Rank 0: DataParallelSGD = [ -MPI Rank 0: gradientBits = 32 -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: AutoAdjust = [ -MPI Rank 0: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 0: loadBestModel = true -MPI Rank 0: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 0: learnRateDecreaseFactor = 0.5 -MPI Rank 0: learnRateIncreaseFactor = 1.382 -MPI Rank 0: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: reader = [ -MPI Rank 0: readerType = "HTKMLFReader" -MPI Rank 0: readMethod = "blockRandomize" -MPI Rank 0: miniBatchMode = "partial" -MPI Rank 0: randomize = "auto" -MPI Rank 0: verbosity = 0 -MPI Rank 0: useMersenneTwisterRand=true -MPI Rank 0: features = [ -MPI Rank 0: dim = 363 -MPI Rank 0: type = "real" -MPI Rank 0: scpFile = "glob_0000.scp" -MPI Rank 0: ] -MPI Rank 0: labels = [ -MPI Rank 0: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 0: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 0: labelDim = 132 -MPI Rank 0: labelType = "category" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 0: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 0: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 0: DeviceId=0 -MPI Rank 0: timestamping=true -MPI Rank 0: numCPUThreads=2 -MPI Rank 0: precision=double -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 0: speechTrain=[SGD=[maxEpochs=4]] -MPI Rank 0: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 0: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:10: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:10: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 0: configparameters: cntk.cntk:command=speechTrain -MPI Rank 0: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 0: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 0: configparameters: cntk.cntk:deviceId=0 -MPI Rank 0: configparameters: cntk.cntk:numCPUThreads=2 -MPI Rank 0: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 0: configparameters: cntk.cntk:parallelTrain=true -MPI Rank 0: configparameters: cntk.cntk:precision=double -MPI Rank 0: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 0: configparameters: cntk.cntk:speechTrain=[ -MPI Rank 0: action = "train" -MPI Rank 0: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn" -MPI Rank 0: deviceId = 0 -MPI Rank 0: traceLevel = 1 -MPI Rank 0: SimpleNetworkBuilder = [ -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 0: evalCriterion = "ClassificationError" -MPI Rank 0: layerTypes = "Sigmoid" -MPI Rank 0: initValueScale = 1.0 -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: uniformInit = true -MPI Rank 0: needPrior = true -MPI Rank 0: ] -MPI Rank 0: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 0: layerSizes = 363:512:512:132 -MPI Rank 0: trainingCriterion = 'CE' -MPI Rank 0: evalCriterion = 'Err' -MPI Rank 0: applyMeanVarNorm = true -MPI Rank 0: L = Length(layerSizes)-1 // number of model layers -MPI Rank 0: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 0: featNorm = if applyMeanVarNorm -MPI Rank 0: then MeanVarNorm(features) -MPI Rank 0: else features -MPI Rank 0: layers[layer:1..L-1] = if layer > 1 -MPI Rank 0: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 0: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 0: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 0: CE = if trainingCriterion == 'CE' -MPI Rank 0: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 0: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 0: Err = if evalCriterion == 'Err' then -MPI Rank 0: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 0: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 0: logPrior = LogPrior(labels) -MPI Rank 0: // TODO: how to add a tag to an infix operation? -MPI Rank 0: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 0: ] -MPI Rank 0: SGD = [ -MPI Rank 0: epochSize = 20480 -MPI Rank 0: minibatchSize = 64:256:1024 -MPI Rank 0: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 0: numMBsToShowResult = 10 -MPI Rank 0: momentumPerMB = 0.9:0.656119 -MPI Rank 0: dropoutRate = 0.0 -MPI Rank 0: maxEpochs = 3 -MPI Rank 0: keepCheckPointFiles = true -MPI Rank 0: clippingThresholdPerSample = 1#INF -MPI Rank 0: ParallelTrain = [ -MPI Rank 0: parallelizationMethod = "DataParallelSGD" -MPI Rank 0: distributedMBReading = true -MPI Rank 0: DataParallelSGD = [ -MPI Rank 0: gradientBits = 32 -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: AutoAdjust = [ -MPI Rank 0: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 0: loadBestModel = true -MPI Rank 0: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 0: learnRateDecreaseFactor = 0.5 -MPI Rank 0: learnRateIncreaseFactor = 1.382 -MPI Rank 0: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: reader = [ -MPI Rank 0: readerType = "HTKMLFReader" -MPI Rank 0: readMethod = "blockRandomize" -MPI Rank 0: miniBatchMode = "partial" -MPI Rank 0: randomize = "auto" -MPI Rank 0: verbosity = 0 -MPI Rank 0: useMersenneTwisterRand=true -MPI Rank 0: features = [ -MPI Rank 0: dim = 363 -MPI Rank 0: type = "real" -MPI Rank 0: scpFile = "glob_0000.scp" -MPI Rank 0: ] -MPI Rank 0: labels = [ -MPI Rank 0: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 0: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 0: labelDim = 132 -MPI Rank 0: labelType = "category" -MPI Rank 0: ] -MPI Rank 0: ] -MPI Rank 0: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 0: -MPI Rank 0: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 0: configparameters: cntk.cntk:timestamping=true -MPI Rank 0: 08/16/2016 03:04:10: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 0: 08/16/2016 03:04:10: Commands: speechTrain -MPI Rank 0: 08/16/2016 03:04:10: Precision = "double" -MPI Rank 0: 08/16/2016 03:04:10: Using 2 CPU threads. -MPI Rank 0: 08/16/2016 03:04:10: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn -MPI Rank 0: 08/16/2016 03:04:10: CNTKCommandTrainInfo: speechTrain : 4 -MPI Rank 0: 08/16/2016 03:04:10: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4 -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:10: ############################################################################## -MPI Rank 0: 08/16/2016 03:04:10: # # -MPI Rank 0: 08/16/2016 03:04:10: # Action "train" # -MPI Rank 0: 08/16/2016 03:04:10: # # -MPI Rank 0: 08/16/2016 03:04:10: ############################################################################## -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:10: CNTKCommandTrainBegin: speechTrain -MPI Rank 0: SimpleNetworkBuilder Using GPU 0 -MPI Rank 0: reading script file glob_0000.scp ... 948 entries -MPI Rank 0: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 0: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 0: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 0: label set 0: 129 classes -MPI Rank 0: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:11: Creating virgin network. -MPI Rank 0: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000. -MPI Rank 0: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false). -MPI Rank 0: Microsoft::MSR::CNTK::GPUMatrix::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8 -MPI Rank 0: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 0: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 0: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000. -MPI Rank 0: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false). -MPI Rank 0: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 0: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 0: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000. -MPI Rank 0: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false). -MPI Rank 0: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 0: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 0: -MPI Rank 0: Post-processing network... -MPI Rank 0: -MPI Rank 0: 7 roots: -MPI Rank 0: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() -MPI Rank 0: EvalClassificationError = ClassificationError() -MPI Rank 0: InvStdOfFeatures = InvStdDev() -MPI Rank 0: MeanOfFeatures = Mean() -MPI Rank 0: PosteriorProb = Softmax() -MPI Rank 0: Prior = Mean() -MPI Rank 0: ScaledLogLikelihood = Minus() -MPI Rank 0: -MPI Rank 0: Validating network. 25 nodes to process in pass 1. -MPI Rank 0: -MPI Rank 0: Validating --> labels = InputValue() : -> [132 x *] -MPI Rank 0: Validating --> W2 = LearnableParameter() : -> [132 x 512] -MPI Rank 0: Validating --> W1 = LearnableParameter() : -> [512 x 512] -MPI Rank 0: Validating --> W0 = LearnableParameter() : -> [512 x 363] -MPI Rank 0: Validating --> features = InputValue() : -> [363 x *] -MPI Rank 0: Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363] -MPI Rank 0: Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363] -MPI Rank 0: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *] -MPI Rank 0: Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *] -MPI Rank 0: Validating --> B0 = LearnableParameter() : -> [512 x 1] -MPI Rank 0: Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 0: Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 0: Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 0: Validating --> B1 = LearnableParameter() : -> [512 x 1] -MPI Rank 0: Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 0: Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 0: Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *] -MPI Rank 0: Validating --> B2 = LearnableParameter() : -> [132 x 1] -MPI Rank 0: Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *] -MPI Rank 0: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 0: Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 0: Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *] -MPI Rank 0: Validating --> Prior = Mean (labels) : [132 x *] -> [132] -MPI Rank 0: Validating --> LogOfPrior = Log (Prior) : [132] -> [132] -MPI Rank 0: Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *] -MPI Rank 0: -MPI Rank 0: Validating network. 17 nodes to process in pass 2. -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: Validating network, final pass. -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 12 out of 25 nodes do not share the minibatch layout with the input data. -MPI Rank 0: -MPI Rank 0: Post-processing network complete. -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:11: Created model with 25 nodes on GPU 0. -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:11: Training criterion node(s): -MPI Rank 0: 08/16/2016 03:04:11: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:11: Evaluation criterion node(s): -MPI Rank 0: 08/16/2016 03:04:11: EvalClassificationError = ClassificationError -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: Allocating matrices for forward and/or backward propagation. -MPI Rank 0: -MPI Rank 0: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 0: -MPI Rank 0: { HLast : [132 x 1 x *] -MPI Rank 0: W2 : [132 x 512] (gradient) } -MPI Rank 0: { B1 : [512 x 1] (gradient) -MPI Rank 0: H2 : [512 x 1 x *] (gradient) -MPI Rank 0: HLast : [132 x 1 x *] (gradient) } -MPI Rank 0: { W0 : [512 x 363] (gradient) -MPI Rank 0: W0*features+B0 : [512 x 1 x *] } -MPI Rank 0: { H1 : [512 x 1 x *] -MPI Rank 0: W0*features : [512 x *] (gradient) } -MPI Rank 0: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1 : [512 x 1 x *] } -MPI Rank 0: { W1 : [512 x 512] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 0: { H2 : [512 x 1 x *] -MPI Rank 0: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 0: { B0 : [512 x 1] (gradient) -MPI Rank 0: H1 : [512 x 1 x *] (gradient) -MPI Rank 0: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 0: W2*H1 : [132 x 1 x *] } -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:11: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:11: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 08/16/2016 03:04:11: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 0: 08/16/2016 03:04:11: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 0: 08/16/2016 03:04:11: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 0: 08/16/2016 03:04:11: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 0: 08/16/2016 03:04:11: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:11: Precomputing --> 3 PreCompute nodes found. -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:11: MeanOfFeatures = Mean() -MPI Rank 0: 08/16/2016 03:04:11: InvStdOfFeatures = InvStdDev() -MPI Rank 0: 08/16/2016 03:04:11: Prior = Mean() -MPI Rank 0: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:16: Precomputing --> Completed. -MPI Rank 0: -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:17: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:17: Starting minibatch loop. -MPI Rank 0: 08/16/2016 03:04:17: Epoch[ 1 of 4]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.1370s; samplesPerSecond = 4671.2 -MPI Rank 0: 08/16/2016 03:04:17: Epoch[ 1 of 4]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.1288s; samplesPerSecond = 4970.8 -MPI Rank 0: 08/16/2016 03:04:17: Epoch[ 1 of 4]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.1286s; samplesPerSecond = 4977.0 -MPI Rank 0: 08/16/2016 03:04:17: Epoch[ 1 of 4]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.73643568 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.1328s; samplesPerSecond = 4818.7 -MPI Rank 0: 08/16/2016 03:04:17: Epoch[ 1 of 4]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83079081 * 640; EvalClassificationError = 0.88281250 * 640; time = 0.1292s; samplesPerSecond = 4954.6 -MPI Rank 0: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71437689 * 640; EvalClassificationError = 0.86875000 * 640; time = 0.1335s; samplesPerSecond = 4792.9 -MPI Rank 0: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.42186230 * 640; EvalClassificationError = 0.79062500 * 640; time = 0.1596s; samplesPerSecond = 4010.3 -MPI Rank 0: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53658053 * 640; EvalClassificationError = 0.82031250 * 640; time = 0.1423s; samplesPerSecond = 4497.6 -MPI Rank 0: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.49758017 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.1336s; samplesPerSecond = 4792.2 -MPI Rank 0: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39996308 * 640; EvalClassificationError = 0.80468750 * 640; time = 0.1292s; samplesPerSecond = 4954.4 -MPI Rank 0: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.49445773 * 640; EvalClassificationError = 0.82500000 * 640; time = 0.1415s; samplesPerSecond = 4524.4 -MPI Rank 0: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.26676999 * 640; EvalClassificationError = 0.79218750 * 640; time = 0.1386s; samplesPerSecond = 4617.8 -MPI Rank 0: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.18870173 * 640; EvalClassificationError = 0.78906250 * 640; time = 0.1315s; samplesPerSecond = 4868.0 -MPI Rank 0: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.05687264 * 640; EvalClassificationError = 0.74687500 * 640; time = 0.1421s; samplesPerSecond = 4504.6 -MPI Rank 0: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.95594569 * 640; EvalClassificationError = 0.71875000 * 640; time = 0.1362s; samplesPerSecond = 4700.6 -MPI Rank 0: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.10219604 * 640; EvalClassificationError = 0.74062500 * 640; time = 0.1499s; samplesPerSecond = 4268.4 -MPI Rank 0: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.80745014 * 640; EvalClassificationError = 0.70625000 * 640; time = 0.1439s; samplesPerSecond = 4446.8 -MPI Rank 0: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72061842 * 640; EvalClassificationError = 0.65468750 * 640; time = 0.1460s; samplesPerSecond = 4384.8 -MPI Rank 0: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80425747 * 640; EvalClassificationError = 0.71718750 * 640; time = 0.1451s; samplesPerSecond = 4409.4 -MPI Rank 0: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.71253068 * 640; EvalClassificationError = 0.67812500 * 640; time = 0.1248s; samplesPerSecond = 5130.1 -MPI Rank 0: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.59360399 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.1448s; samplesPerSecond = 4419.0 -MPI Rank 0: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60386649 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.1340s; samplesPerSecond = 4777.9 -MPI Rank 0: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53706678 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.1181s; samplesPerSecond = 5416.9 -MPI Rank 0: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.56177343 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.1340s; samplesPerSecond = 4777.5 -MPI Rank 0: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.50118791 * 640; EvalClassificationError = 0.64218750 * 640; time = 0.1544s; samplesPerSecond = 4144.8 -MPI Rank 0: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.40119788 * 640; EvalClassificationError = 0.62500000 * 640; time = 0.1423s; samplesPerSecond = 4498.4 -MPI Rank 0: 08/16/2016 03:04:21: Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27491503 * 640; EvalClassificationError = 0.58906250 * 640; time = 0.1369s; samplesPerSecond = 4676.1 -MPI Rank 0: 08/16/2016 03:04:21: Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.51724208 * 640; EvalClassificationError = 0.65781250 * 640; time = 0.1422s; samplesPerSecond = 4502.3 -MPI Rank 0: 08/16/2016 03:04:21: Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27797542 * 640; EvalClassificationError = 0.59687500 * 640; time = 0.1424s; samplesPerSecond = 4495.8 -MPI Rank 0: 08/16/2016 03:04:21: Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26017740 * 640; EvalClassificationError = 0.60937500 * 640; time = 0.1374s; samplesPerSecond = 4658.8 -MPI Rank 0: 08/16/2016 03:04:21: Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24735342 * 640; EvalClassificationError = 0.58437500 * 640; time = 0.0777s; samplesPerSecond = 8235.1 -MPI Rank 0: 08/16/2016 03:04:21: Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.23665382 * 640; EvalClassificationError = 0.60625000 * 640; time = 0.0478s; samplesPerSecond = 13378.5 -MPI Rank 0: 08/16/2016 03:04:21: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.03815141 * 20480; EvalClassificationError = 0.73432617 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=4.27772s -MPI Rank 0: 08/16/2016 03:04:21: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.1' -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:21: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 0: Actual gradient aggregation time: 0.01782 -MPI Rank 0: Async gradient aggregation wait time: 0.005297 -MPI Rank 0: Actual gradient aggregation time: 0.025182 -MPI Rank 0: 08/16/2016 03:04:21: Epoch[ 2 of 4]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.18586881 * 2304; EvalClassificationError = 0.58029514 * 2304; time = 0.2416s; samplesPerSecond = 9535.6 -MPI Rank 0: Async gradient aggregation wait time: 0.013746 -MPI Rank 0: Actual gradient aggregation time: 0.02406 -MPI Rank 0: Async gradient aggregation wait time: 1e-006 -MPI Rank 0: Actual gradient aggregation time: 0.013781 -MPI Rank 0: 08/16/2016 03:04:22: Epoch[ 2 of 4]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.21453123 * 2560; EvalClassificationError = 0.59101563 * 2560; time = 0.2414s; samplesPerSecond = 10606.2 -MPI Rank 0: Async gradient aggregation wait time: 0.005801 -MPI Rank 0: Actual gradient aggregation time: 0.023583 -MPI Rank 0: Async gradient aggregation wait time: 0.005938 -MPI Rank 0: Actual gradient aggregation time: 0.024675 -MPI Rank 0: 08/16/2016 03:04:22: Epoch[ 2 of 4]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.23428937 * 2560; EvalClassificationError = 0.59843750 * 2560; time = 0.2445s; samplesPerSecond = 10470.5 -MPI Rank 0: Async gradient aggregation wait time: 0.005494 -MPI Rank 0: Actual gradient aggregation time: 0.023604 -MPI Rank 0: Async gradient aggregation wait time: 0.010067 -MPI Rank 0: Actual gradient aggregation time: 0.023875 -MPI Rank 0: 08/16/2016 03:04:22: Epoch[ 2 of 4]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.22238576 * 2560; EvalClassificationError = 0.59609375 * 2560; time = 0.2414s; samplesPerSecond = 10605.9 -MPI Rank 0: Async gradient aggregation wait time: 0.009865 -MPI Rank 0: Actual gradient aggregation time: 0.02315 -MPI Rank 0: Async gradient aggregation wait time: 0.011288 -MPI Rank 0: Actual gradient aggregation time: 0.025755 -MPI Rank 0: 08/16/2016 03:04:22: Epoch[ 2 of 4]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.17945944 * 2560; EvalClassificationError = 0.58632812 * 2560; time = 0.2418s; samplesPerSecond = 10585.4 -MPI Rank 0: Async gradient aggregation wait time: 0.002672 -MPI Rank 0: Actual gradient aggregation time: 0.023819 -MPI Rank 0: Async gradient aggregation wait time: 0.004923 -MPI Rank 0: Actual gradient aggregation time: 0.028438 -MPI Rank 0: 08/16/2016 03:04:23: Epoch[ 2 of 4]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.13880131 * 2560; EvalClassificationError = 0.58164063 * 2560; time = 0.2354s; samplesPerSecond = 10875.5 -MPI Rank 0: Async gradient aggregation wait time: 0.007363 -MPI Rank 0: Actual gradient aggregation time: 0.023261 -MPI Rank 0: Async gradient aggregation wait time: 0.00721 -MPI Rank 0: Actual gradient aggregation time: 0.023427 -MPI Rank 0: 08/16/2016 03:04:23: Epoch[ 2 of 4]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 2.12741612 * 2560; EvalClassificationError = 0.57031250 * 2560; time = 0.2462s; samplesPerSecond = 10398.9 -MPI Rank 0: Async gradient aggregation wait time: 0.007334 -MPI Rank 0: Actual gradient aggregation time: 0.024979 -MPI Rank 0: Async gradient aggregation wait time: 0.009245 -MPI Rank 0: Actual gradient aggregation time: 0.024328 -MPI Rank 0: 08/16/2016 03:04:23: Epoch[ 2 of 4]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 2.09486743 * 2560; EvalClassificationError = 0.58242187 * 2560; time = 0.2420s; samplesPerSecond = 10580.1 -MPI Rank 0: Async gradient aggregation wait time: 0.008826 -MPI Rank 0: Actual gradient aggregation time: 0.01018 -MPI Rank 0: 08/16/2016 03:04:23: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.17271296 * 20480; EvalClassificationError = 0.58520508 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.96059s -MPI Rank 0: 08/16/2016 03:04:23: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.2' -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:23: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 0: Async gradient aggregation wait time: 0.003725 -MPI Rank 0: Actual gradient aggregation time: 0.069103 -MPI Rank 0: Async gradient aggregation wait time: 0.001861 -MPI Rank 0: Actual gradient aggregation time: 0.071875 -MPI Rank 0: 08/16/2016 03:04:24: Epoch[ 3 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 2.17281503 * 9216; EvalClassificationError = 0.55924479 * 9216; time = 0.7209s; samplesPerSecond = 12784.6 -MPI Rank 0: Async gradient aggregation wait time: 0.00561 -MPI Rank 0: Actual gradient aggregation time: 0.06794 -MPI Rank 0: Async gradient aggregation wait time: 0.016713 -MPI Rank 0: Actual gradient aggregation time: 0.053532 -MPI Rank 0: 08/16/2016 03:04:25: Epoch[ 3 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 2.02446206 * 10240; EvalClassificationError = 0.55722656 * 10240; time = 0.6932s; samplesPerSecond = 14772.1 -MPI Rank 0: 08/16/2016 03:04:25: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.09074709 * 20480; EvalClassificationError = 0.55820313 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.45977s -MPI Rank 0: 08/16/2016 03:04:25: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn.3' -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:25: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 0: Async gradient aggregation wait time: 0.033541 -MPI Rank 0: Actual gradient aggregation time: 0.07365 -MPI Rank 0: Async gradient aggregation wait time: 0.011228 -MPI Rank 0: Actual gradient aggregation time: 0.069676 -MPI Rank 0: 08/16/2016 03:04:25: Epoch[ 4 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95451978 * 9216; EvalClassificationError = 0.52962240 * 9216; time = 0.7010s; samplesPerSecond = 13146.3 -MPI Rank 0: Async gradient aggregation wait time: 0.003101 -MPI Rank 0: Actual gradient aggregation time: 0.068768 -MPI Rank 0: Async gradient aggregation wait time: 0.007032 -MPI Rank 0: Actual gradient aggregation time: 0.068417 -MPI Rank 0: 08/16/2016 03:04:26: Epoch[ 4 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.95218466 * 10240; EvalClassificationError = 0.52802734 * 10240; time = 0.7013s; samplesPerSecond = 14602.1 -MPI Rank 0: Async gradient aggregation wait time: 0.009283 -MPI Rank 0: 08/16/2016 03:04:26: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.95485031 * 20480; EvalClassificationError = 0.52915039 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=1.42792s -MPI Rank 0: 08/16/2016 03:04:26: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn' -MPI Rank 0: 08/16/2016 03:04:26: CNTKCommandTrainEnd: speechTrain -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:26: Action "train" complete. -MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:26: __COMPLETED__ -MPI Rank 0: ~MPIWrapper -MPI Rank 1: 08/16/2016 03:04:11: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank1 -MPI Rank 1: 08/16/2016 03:04:11: ------------------------------------------------------------------- -MPI Rank 1: 08/16/2016 03:04:11: Build info: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:11: Built time: Aug 16 2016 02:54:53 -MPI Rank 1: 08/16/2016 03:04:11: Last modified date: Fri Aug 12 05:31:21 2016 -MPI Rank 1: 08/16/2016 03:04:11: Build type: Release -MPI Rank 1: 08/16/2016 03:04:11: Build target: GPU -MPI Rank 1: 08/16/2016 03:04:11: With 1bit-SGD: no -MPI Rank 1: 08/16/2016 03:04:11: Math lib: mkl -MPI Rank 1: 08/16/2016 03:04:11: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 -MPI Rank 1: 08/16/2016 03:04:11: CUB_PATH: c:\src\cub-1.4.1 -MPI Rank 1: 08/16/2016 03:04:11: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda -MPI Rank 1: 08/16/2016 03:04:11: Build Branch: HEAD -MPI Rank 1: 08/16/2016 03:04:11: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 -MPI Rank 1: 08/16/2016 03:04:11: Built by svcphil on Philly-Pool3 -MPI Rank 1: 08/16/2016 03:04:11: Build Path: c:\Jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -MPI Rank 1: 08/16/2016 03:04:11: ------------------------------------------------------------------- -MPI Rank 1: 08/16/2016 03:04:11: ------------------------------------------------------------------- -MPI Rank 1: 08/16/2016 03:04:11: GPU info: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:11: Device[0]: cores = 2496; computeCapability = 5.2; type = "Quadro M4000"; memory = 8192 MB -MPI Rank 1: 08/16/2016 03:04:11: ------------------------------------------------------------------- -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:11: Running on cntk-muc01 at 2016/08/16 03:04:11 -MPI Rank 1: 08/16/2016 03:04:11: Command line: -MPI Rank 1: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu DeviceId=0 timestamping=true numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:11: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>> -MPI Rank 1: 08/16/2016 03:04:11: precision = "float" -MPI Rank 1: command = speechTrain -MPI Rank 1: deviceId = $DeviceId$ -MPI Rank 1: parallelTrain = true -MPI Rank 1: speechTrain = [ -MPI Rank 1: action = "train" -MPI Rank 1: modelPath = "$RunDir$/models/cntkSpeech.dnn" -MPI Rank 1: deviceId = $DeviceId$ -MPI Rank 1: traceLevel = 1 -MPI Rank 1: SimpleNetworkBuilder = [ -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 1: evalCriterion = "ClassificationError" -MPI Rank 1: layerTypes = "Sigmoid" -MPI Rank 1: initValueScale = 1.0 -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: uniformInit = true -MPI Rank 1: needPrior = true -MPI Rank 1: ] -MPI Rank 1: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = 'CE' -MPI Rank 1: evalCriterion = 'Err' -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: L = Length(layerSizes)-1 // number of model layers -MPI Rank 1: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 1: featNorm = if applyMeanVarNorm -MPI Rank 1: then MeanVarNorm(features) -MPI Rank 1: else features -MPI Rank 1: layers[layer:1..L-1] = if layer > 1 -MPI Rank 1: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 1: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 1: CE = if trainingCriterion == 'CE' -MPI Rank 1: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 1: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 1: Err = if evalCriterion == 'Err' then -MPI Rank 1: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 1: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 1: logPrior = LogPrior(labels) -MPI Rank 1: // TODO: how to add a tag to an infix operation? -MPI Rank 1: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 1: ] -MPI Rank 1: SGD = [ -MPI Rank 1: epochSize = 20480 -MPI Rank 1: minibatchSize = 64:256:1024 -MPI Rank 1: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 1: numMBsToShowResult = 10 -MPI Rank 1: momentumPerMB = 0.9:0.656119 -MPI Rank 1: dropoutRate = 0.0 -MPI Rank 1: maxEpochs = 3 -MPI Rank 1: keepCheckPointFiles = true -MPI Rank 1: clippingThresholdPerSample = 1#INF -MPI Rank 1: ParallelTrain = [ -MPI Rank 1: parallelizationMethod = "DataParallelSGD" -MPI Rank 1: distributedMBReading = true -MPI Rank 1: DataParallelSGD = [ -MPI Rank 1: gradientBits = 32 -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: AutoAdjust = [ -MPI Rank 1: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 1: loadBestModel = true -MPI Rank 1: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 1: learnRateDecreaseFactor = 0.5 -MPI Rank 1: learnRateIncreaseFactor = 1.382 -MPI Rank 1: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: reader = [ -MPI Rank 1: readerType = "HTKMLFReader" -MPI Rank 1: readMethod = "blockRandomize" -MPI Rank 1: miniBatchMode = "partial" -MPI Rank 1: randomize = "auto" -MPI Rank 1: verbosity = 0 -MPI Rank 1: useMersenneTwisterRand=true -MPI Rank 1: features = [ -MPI Rank 1: dim = 363 -MPI Rank 1: type = "real" -MPI Rank 1: scpFile = "glob_0000.scp" -MPI Rank 1: ] -MPI Rank 1: labels = [ -MPI Rank 1: mlfFile = "$DataDir$/glob_0000.mlf" -MPI Rank 1: labelMappingFile = "$DataDir$/state.list" -MPI Rank 1: labelDim = 132 -MPI Rank 1: labelType = "category" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 1: DeviceId=0 -MPI Rank 1: timestamping=true -MPI Rank 1: numCPUThreads=2 -MPI Rank 1: precision=double -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]] -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:11: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<< -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:11: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 1: 08/16/2016 03:04:11: precision = "float" -MPI Rank 1: command = speechTrain -MPI Rank 1: deviceId = 0 -MPI Rank 1: parallelTrain = true -MPI Rank 1: speechTrain = [ -MPI Rank 1: action = "train" -MPI Rank 1: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn" -MPI Rank 1: deviceId = 0 -MPI Rank 1: traceLevel = 1 -MPI Rank 1: SimpleNetworkBuilder = [ -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 1: evalCriterion = "ClassificationError" -MPI Rank 1: layerTypes = "Sigmoid" -MPI Rank 1: initValueScale = 1.0 -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: uniformInit = true -MPI Rank 1: needPrior = true -MPI Rank 1: ] -MPI Rank 1: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = 'CE' -MPI Rank 1: evalCriterion = 'Err' -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: L = Length(layerSizes)-1 // number of model layers -MPI Rank 1: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 1: featNorm = if applyMeanVarNorm -MPI Rank 1: then MeanVarNorm(features) -MPI Rank 1: else features -MPI Rank 1: layers[layer:1..L-1] = if layer > 1 -MPI Rank 1: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 1: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 1: CE = if trainingCriterion == 'CE' -MPI Rank 1: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 1: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 1: Err = if evalCriterion == 'Err' then -MPI Rank 1: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 1: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 1: logPrior = LogPrior(labels) -MPI Rank 1: // TODO: how to add a tag to an infix operation? -MPI Rank 1: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 1: ] -MPI Rank 1: SGD = [ -MPI Rank 1: epochSize = 20480 -MPI Rank 1: minibatchSize = 64:256:1024 -MPI Rank 1: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 1: numMBsToShowResult = 10 -MPI Rank 1: momentumPerMB = 0.9:0.656119 -MPI Rank 1: dropoutRate = 0.0 -MPI Rank 1: maxEpochs = 3 -MPI Rank 1: keepCheckPointFiles = true -MPI Rank 1: clippingThresholdPerSample = 1#INF -MPI Rank 1: ParallelTrain = [ -MPI Rank 1: parallelizationMethod = "DataParallelSGD" -MPI Rank 1: distributedMBReading = true -MPI Rank 1: DataParallelSGD = [ -MPI Rank 1: gradientBits = 32 -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: AutoAdjust = [ -MPI Rank 1: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 1: loadBestModel = true -MPI Rank 1: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 1: learnRateDecreaseFactor = 0.5 -MPI Rank 1: learnRateIncreaseFactor = 1.382 -MPI Rank 1: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: reader = [ -MPI Rank 1: readerType = "HTKMLFReader" -MPI Rank 1: readMethod = "blockRandomize" -MPI Rank 1: miniBatchMode = "partial" -MPI Rank 1: randomize = "auto" -MPI Rank 1: verbosity = 0 -MPI Rank 1: useMersenneTwisterRand=true -MPI Rank 1: features = [ -MPI Rank 1: dim = 363 -MPI Rank 1: type = "real" -MPI Rank 1: scpFile = "glob_0000.scp" -MPI Rank 1: ] -MPI Rank 1: labels = [ -MPI Rank 1: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 1: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 1: labelDim = 132 -MPI Rank 1: labelType = "category" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 1: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 1: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 1: DeviceId=0 -MPI Rank 1: timestamping=true -MPI Rank 1: numCPUThreads=2 -MPI Rank 1: precision=double -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 1: speechTrain=[SGD=[maxEpochs=4]] -MPI Rank 1: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 1: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:11: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:11: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 1: configparameters: cntk.cntk:command=speechTrain -MPI Rank 1: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 1: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 1: configparameters: cntk.cntk:deviceId=0 -MPI Rank 1: configparameters: cntk.cntk:numCPUThreads=2 -MPI Rank 1: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 1: configparameters: cntk.cntk:parallelTrain=true -MPI Rank 1: configparameters: cntk.cntk:precision=double -MPI Rank 1: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 1: configparameters: cntk.cntk:speechTrain=[ -MPI Rank 1: action = "train" -MPI Rank 1: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn" -MPI Rank 1: deviceId = 0 -MPI Rank 1: traceLevel = 1 -MPI Rank 1: SimpleNetworkBuilder = [ -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 1: evalCriterion = "ClassificationError" -MPI Rank 1: layerTypes = "Sigmoid" -MPI Rank 1: initValueScale = 1.0 -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: uniformInit = true -MPI Rank 1: needPrior = true -MPI Rank 1: ] -MPI Rank 1: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 1: layerSizes = 363:512:512:132 -MPI Rank 1: trainingCriterion = 'CE' -MPI Rank 1: evalCriterion = 'Err' -MPI Rank 1: applyMeanVarNorm = true -MPI Rank 1: L = Length(layerSizes)-1 // number of model layers -MPI Rank 1: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 1: featNorm = if applyMeanVarNorm -MPI Rank 1: then MeanVarNorm(features) -MPI Rank 1: else features -MPI Rank 1: layers[layer:1..L-1] = if layer > 1 -MPI Rank 1: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 1: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 1: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 1: CE = if trainingCriterion == 'CE' -MPI Rank 1: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 1: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 1: Err = if evalCriterion == 'Err' then -MPI Rank 1: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 1: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 1: logPrior = LogPrior(labels) -MPI Rank 1: // TODO: how to add a tag to an infix operation? -MPI Rank 1: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 1: ] -MPI Rank 1: SGD = [ -MPI Rank 1: epochSize = 20480 -MPI Rank 1: minibatchSize = 64:256:1024 -MPI Rank 1: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 1: numMBsToShowResult = 10 -MPI Rank 1: momentumPerMB = 0.9:0.656119 -MPI Rank 1: dropoutRate = 0.0 -MPI Rank 1: maxEpochs = 3 -MPI Rank 1: keepCheckPointFiles = true -MPI Rank 1: clippingThresholdPerSample = 1#INF -MPI Rank 1: ParallelTrain = [ -MPI Rank 1: parallelizationMethod = "DataParallelSGD" -MPI Rank 1: distributedMBReading = true -MPI Rank 1: DataParallelSGD = [ -MPI Rank 1: gradientBits = 32 -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: AutoAdjust = [ -MPI Rank 1: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 1: loadBestModel = true -MPI Rank 1: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 1: learnRateDecreaseFactor = 0.5 -MPI Rank 1: learnRateIncreaseFactor = 1.382 -MPI Rank 1: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: reader = [ -MPI Rank 1: readerType = "HTKMLFReader" -MPI Rank 1: readMethod = "blockRandomize" -MPI Rank 1: miniBatchMode = "partial" -MPI Rank 1: randomize = "auto" -MPI Rank 1: verbosity = 0 -MPI Rank 1: useMersenneTwisterRand=true -MPI Rank 1: features = [ -MPI Rank 1: dim = 363 -MPI Rank 1: type = "real" -MPI Rank 1: scpFile = "glob_0000.scp" -MPI Rank 1: ] -MPI Rank 1: labels = [ -MPI Rank 1: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 1: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 1: labelDim = 132 -MPI Rank 1: labelType = "category" -MPI Rank 1: ] -MPI Rank 1: ] -MPI Rank 1: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 1: -MPI Rank 1: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 1: configparameters: cntk.cntk:timestamping=true -MPI Rank 1: 08/16/2016 03:04:11: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 1: 08/16/2016 03:04:11: Commands: speechTrain -MPI Rank 1: 08/16/2016 03:04:11: Precision = "double" -MPI Rank 1: 08/16/2016 03:04:11: Using 2 CPU threads. -MPI Rank 1: 08/16/2016 03:04:11: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn -MPI Rank 1: 08/16/2016 03:04:11: CNTKCommandTrainInfo: speechTrain : 4 -MPI Rank 1: 08/16/2016 03:04:11: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4 -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:11: ############################################################################## -MPI Rank 1: 08/16/2016 03:04:11: # # -MPI Rank 1: 08/16/2016 03:04:11: # Action "train" # -MPI Rank 1: 08/16/2016 03:04:11: # # -MPI Rank 1: 08/16/2016 03:04:11: ############################################################################## -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:11: CNTKCommandTrainBegin: speechTrain -MPI Rank 1: SimpleNetworkBuilder Using GPU 0 -MPI Rank 1: reading script file glob_0000.scp ... 948 entries -MPI Rank 1: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 1: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 1: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 1: label set 0: 129 classes -MPI Rank 1: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:11: Creating virgin network. -MPI Rank 1: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000. -MPI Rank 1: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false). -MPI Rank 1: Microsoft::MSR::CNTK::GPUMatrix::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8 -MPI Rank 1: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 1: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 1: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000. -MPI Rank 1: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false). -MPI Rank 1: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 1: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 1: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000. -MPI Rank 1: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false). -MPI Rank 1: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 1: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 1: -MPI Rank 1: Post-processing network... -MPI Rank 1: -MPI Rank 1: 7 roots: -MPI Rank 1: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() -MPI Rank 1: EvalClassificationError = ClassificationError() -MPI Rank 1: InvStdOfFeatures = InvStdDev() -MPI Rank 1: MeanOfFeatures = Mean() -MPI Rank 1: PosteriorProb = Softmax() -MPI Rank 1: Prior = Mean() -MPI Rank 1: ScaledLogLikelihood = Minus() -MPI Rank 1: -MPI Rank 1: Validating network. 25 nodes to process in pass 1. -MPI Rank 1: -MPI Rank 1: Validating --> labels = InputValue() : -> [132 x *] -MPI Rank 1: Validating --> W2 = LearnableParameter() : -> [132 x 512] -MPI Rank 1: Validating --> W1 = LearnableParameter() : -> [512 x 512] -MPI Rank 1: Validating --> W0 = LearnableParameter() : -> [512 x 363] -MPI Rank 1: Validating --> features = InputValue() : -> [363 x *] -MPI Rank 1: Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363] -MPI Rank 1: Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363] -MPI Rank 1: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *] -MPI Rank 1: Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *] -MPI Rank 1: Validating --> B0 = LearnableParameter() : -> [512 x 1] -MPI Rank 1: Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 1: Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 1: Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 1: Validating --> B1 = LearnableParameter() : -> [512 x 1] -MPI Rank 1: Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 1: Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 1: Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *] -MPI Rank 1: Validating --> B2 = LearnableParameter() : -> [132 x 1] -MPI Rank 1: Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *] -MPI Rank 1: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 1: Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 1: Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *] -MPI Rank 1: Validating --> Prior = Mean (labels) : [132 x *] -> [132] -MPI Rank 1: Validating --> LogOfPrior = Log (Prior) : [132] -> [132] -MPI Rank 1: Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *] -MPI Rank 1: -MPI Rank 1: Validating network. 17 nodes to process in pass 2. -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: Validating network, final pass. -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 12 out of 25 nodes do not share the minibatch layout with the input data. -MPI Rank 1: -MPI Rank 1: Post-processing network complete. -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:12: Created model with 25 nodes on GPU 0. -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:12: Training criterion node(s): -MPI Rank 1: 08/16/2016 03:04:12: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:12: Evaluation criterion node(s): -MPI Rank 1: 08/16/2016 03:04:12: EvalClassificationError = ClassificationError -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: Allocating matrices for forward and/or backward propagation. -MPI Rank 1: -MPI Rank 1: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 1: -MPI Rank 1: { B1 : [512 x 1] (gradient) -MPI Rank 1: H2 : [512 x 1 x *] (gradient) -MPI Rank 1: HLast : [132 x 1 x *] (gradient) } -MPI Rank 1: { W0 : [512 x 363] (gradient) -MPI Rank 1: W0*features+B0 : [512 x 1 x *] } -MPI Rank 1: { HLast : [132 x 1 x *] -MPI Rank 1: W2 : [132 x 512] (gradient) } -MPI Rank 1: { H2 : [512 x 1 x *] -MPI Rank 1: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 1: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1 : [512 x 1 x *] } -MPI Rank 1: { H1 : [512 x 1 x *] -MPI Rank 1: W0*features : [512 x *] (gradient) } -MPI Rank 1: { B0 : [512 x 1] (gradient) -MPI Rank 1: H1 : [512 x 1 x *] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 1: W2*H1 : [132 x 1 x *] } -MPI Rank 1: { W1 : [512 x 512] (gradient) -MPI Rank 1: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:12: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:12: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 08/16/2016 03:04:12: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 1: 08/16/2016 03:04:12: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 1: 08/16/2016 03:04:12: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 1: 08/16/2016 03:04:12: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 1: 08/16/2016 03:04:12: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:12: Precomputing --> 3 PreCompute nodes found. -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:12: MeanOfFeatures = Mean() -MPI Rank 1: 08/16/2016 03:04:12: InvStdOfFeatures = InvStdDev() -MPI Rank 1: 08/16/2016 03:04:12: Prior = Mean() -MPI Rank 1: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:17: Precomputing --> Completed. -MPI Rank 1: -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:17: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:17: Starting minibatch loop. -MPI Rank 1: 08/16/2016 03:04:17: Epoch[ 1 of 4]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.1383s; samplesPerSecond = 4626.9 -MPI Rank 1: 08/16/2016 03:04:17: Epoch[ 1 of 4]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.1277s; samplesPerSecond = 5012.4 -MPI Rank 1: 08/16/2016 03:04:17: Epoch[ 1 of 4]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.1322s; samplesPerSecond = 4842.3 -MPI Rank 1: 08/16/2016 03:04:17: Epoch[ 1 of 4]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.73643568 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.1281s; samplesPerSecond = 4996.8 -MPI Rank 1: 08/16/2016 03:04:17: Epoch[ 1 of 4]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83079081 * 640; EvalClassificationError = 0.88281250 * 640; time = 0.1287s; samplesPerSecond = 4972.5 -MPI Rank 1: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71437689 * 640; EvalClassificationError = 0.86875000 * 640; time = 0.1302s; samplesPerSecond = 4915.3 -MPI Rank 1: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.42186230 * 640; EvalClassificationError = 0.79062500 * 640; time = 0.1346s; samplesPerSecond = 4754.3 -MPI Rank 1: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53658053 * 640; EvalClassificationError = 0.82031250 * 640; time = 0.1114s; samplesPerSecond = 5744.4 -MPI Rank 1: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.49758017 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.1217s; samplesPerSecond = 5259.0 -MPI Rank 1: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39996308 * 640; EvalClassificationError = 0.80468750 * 640; time = 0.1293s; samplesPerSecond = 4950.4 -MPI Rank 1: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.49445773 * 640; EvalClassificationError = 0.82500000 * 640; time = 0.1262s; samplesPerSecond = 5071.9 -MPI Rank 1: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.26676999 * 640; EvalClassificationError = 0.79218750 * 640; time = 0.1379s; samplesPerSecond = 4641.1 -MPI Rank 1: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.18870173 * 640; EvalClassificationError = 0.78906250 * 640; time = 0.1298s; samplesPerSecond = 4928.9 -MPI Rank 1: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.05687264 * 640; EvalClassificationError = 0.74687500 * 640; time = 0.1210s; samplesPerSecond = 5289.1 -MPI Rank 1: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.95594569 * 640; EvalClassificationError = 0.71875000 * 640; time = 0.1194s; samplesPerSecond = 5361.6 -MPI Rank 1: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.10219604 * 640; EvalClassificationError = 0.74062500 * 640; time = 0.1416s; samplesPerSecond = 4521.2 -MPI Rank 1: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.80745014 * 640; EvalClassificationError = 0.70625000 * 640; time = 0.1382s; samplesPerSecond = 4632.6 -MPI Rank 1: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72061842 * 640; EvalClassificationError = 0.65468750 * 640; time = 0.1413s; samplesPerSecond = 4528.9 -MPI Rank 1: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80425747 * 640; EvalClassificationError = 0.71718750 * 640; time = 0.1322s; samplesPerSecond = 4841.9 -MPI Rank 1: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.71253068 * 640; EvalClassificationError = 0.67812500 * 640; time = 0.1376s; samplesPerSecond = 4649.5 -MPI Rank 1: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.59360399 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.1460s; samplesPerSecond = 4382.5 -MPI Rank 1: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60386649 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.1281s; samplesPerSecond = 4995.8 -MPI Rank 1: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53706678 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.1469s; samplesPerSecond = 4356.5 -MPI Rank 1: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.56177343 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.1340s; samplesPerSecond = 4775.4 -MPI Rank 1: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.50118791 * 640; EvalClassificationError = 0.64218750 * 640; time = 0.1223s; samplesPerSecond = 5232.3 -MPI Rank 1: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.40119788 * 640; EvalClassificationError = 0.62500000 * 640; time = 0.1326s; samplesPerSecond = 4825.6 -MPI Rank 1: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27491503 * 640; EvalClassificationError = 0.58906250 * 640; time = 0.1388s; samplesPerSecond = 4611.2 -MPI Rank 1: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.51724208 * 640; EvalClassificationError = 0.65781250 * 640; time = 0.1185s; samplesPerSecond = 5402.5 -MPI Rank 1: 08/16/2016 03:04:21: Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27797542 * 640; EvalClassificationError = 0.59687500 * 640; time = 0.1241s; samplesPerSecond = 5156.4 -MPI Rank 1: 08/16/2016 03:04:21: Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26017740 * 640; EvalClassificationError = 0.60937500 * 640; time = 0.1334s; samplesPerSecond = 4799.2 -MPI Rank 1: 08/16/2016 03:04:21: Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24735342 * 640; EvalClassificationError = 0.58437500 * 640; time = 0.1367s; samplesPerSecond = 4680.3 -MPI Rank 1: 08/16/2016 03:04:21: Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.23665382 * 640; EvalClassificationError = 0.60625000 * 640; time = 0.1250s; samplesPerSecond = 5120.9 -MPI Rank 1: 08/16/2016 03:04:21: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.03815141 * 20480; EvalClassificationError = 0.73432617 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=4.20569s -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:21: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 1: Actual gradient aggregation time: 0.020729 -MPI Rank 1: Async gradient aggregation wait time: 0.009212 -MPI Rank 1: Actual gradient aggregation time: 0.025214 -MPI Rank 1: 08/16/2016 03:04:21: Epoch[ 2 of 4]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.18586881 * 2304; EvalClassificationError = 0.58029514 * 2304; time = 0.2425s; samplesPerSecond = 9499.3 -MPI Rank 1: Async gradient aggregation wait time: 0.007698 -MPI Rank 1: Actual gradient aggregation time: 0.026004 -MPI Rank 1: Async gradient aggregation wait time: 0.004755 -MPI Rank 1: Actual gradient aggregation time: 0.027675 -MPI Rank 1: 08/16/2016 03:04:22: Epoch[ 2 of 4]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.21453123 * 2560; EvalClassificationError = 0.59101563 * 2560; time = 0.2392s; samplesPerSecond = 10701.1 -MPI Rank 1: Async gradient aggregation wait time: 0.011416 -MPI Rank 1: Actual gradient aggregation time: 0.02323 -MPI Rank 1: Async gradient aggregation wait time: 0.006917 -MPI Rank 1: Actual gradient aggregation time: 0.022934 -MPI Rank 1: 08/16/2016 03:04:22: Epoch[ 2 of 4]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.23428937 * 2560; EvalClassificationError = 0.59843750 * 2560; time = 0.2449s; samplesPerSecond = 10453.7 -MPI Rank 1: Async gradient aggregation wait time: 0.004566 -MPI Rank 1: Actual gradient aggregation time: 0.023535 -MPI Rank 1: Async gradient aggregation wait time: 0.005357 -MPI Rank 1: Actual gradient aggregation time: 0.0244 -MPI Rank 1: 08/16/2016 03:04:22: Epoch[ 2 of 4]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.22238576 * 2560; EvalClassificationError = 0.59609375 * 2560; time = 0.2437s; samplesPerSecond = 10505.2 -MPI Rank 1: Async gradient aggregation wait time: 0.005284 -MPI Rank 1: Actual gradient aggregation time: 0.023907 -MPI Rank 1: Async gradient aggregation wait time: 0.006948 -MPI Rank 1: Actual gradient aggregation time: 0.023374 -MPI Rank 1: 08/16/2016 03:04:22: Epoch[ 2 of 4]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.17945944 * 2560; EvalClassificationError = 0.58632812 * 2560; time = 0.2395s; samplesPerSecond = 10690.0 -MPI Rank 1: Async gradient aggregation wait time: 0.009502 -MPI Rank 1: Actual gradient aggregation time: 0.024024 -MPI Rank 1: Async gradient aggregation wait time: 0.004109 -MPI Rank 1: Actual gradient aggregation time: 0.028062 -MPI Rank 1: 08/16/2016 03:04:23: Epoch[ 2 of 4]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.13880131 * 2560; EvalClassificationError = 0.58164063 * 2560; time = 0.2342s; samplesPerSecond = 10930.4 -MPI Rank 1: Async gradient aggregation wait time: 0.006357 -MPI Rank 1: Actual gradient aggregation time: 0.023074 -MPI Rank 1: Async gradient aggregation wait time: 0.006199 -MPI Rank 1: Actual gradient aggregation time: 0.023349 -MPI Rank 1: 08/16/2016 03:04:23: Epoch[ 2 of 4]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 2.12741612 * 2560; EvalClassificationError = 0.57031250 * 2560; time = 0.2466s; samplesPerSecond = 10379.1 -MPI Rank 1: Async gradient aggregation wait time: 0.006286 -MPI Rank 1: Actual gradient aggregation time: 0.024889 -MPI Rank 1: Async gradient aggregation wait time: 0.004473 -MPI Rank 1: Actual gradient aggregation time: 0.023886 -MPI Rank 1: 08/16/2016 03:04:23: Epoch[ 2 of 4]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 2.09486743 * 2560; EvalClassificationError = 0.58242187 * 2560; time = 0.2423s; samplesPerSecond = 10563.5 -MPI Rank 1: Async gradient aggregation wait time: 0.00911 -MPI Rank 1: Actual gradient aggregation time: 0.009992 -MPI Rank 1: 08/16/2016 03:04:23: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.17271296 * 20480; EvalClassificationError = 0.58520508 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.9599s -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:23: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 1: Async gradient aggregation wait time: 0.061455 -MPI Rank 1: Actual gradient aggregation time: 0.070776 -MPI Rank 1: Async gradient aggregation wait time: 0.04993 -MPI Rank 1: Actual gradient aggregation time: 0.071555 -MPI Rank 1: 08/16/2016 03:04:24: Epoch[ 3 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 2.17281503 * 9216; EvalClassificationError = 0.55924479 * 9216; time = 0.7198s; samplesPerSecond = 12803.0 -MPI Rank 1: Async gradient aggregation wait time: 0.004585 -MPI Rank 1: Actual gradient aggregation time: 0.069929 -MPI Rank 1: Async gradient aggregation wait time: 0.002329 -MPI Rank 1: Actual gradient aggregation time: 0.053394 -MPI Rank 1: 08/16/2016 03:04:25: Epoch[ 3 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 2.02446206 * 10240; EvalClassificationError = 0.55722656 * 10240; time = 0.7150s; samplesPerSecond = 14321.6 -MPI Rank 1: 08/16/2016 03:04:25: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.09074709 * 20480; EvalClassificationError = 0.55820313 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.46021s -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:25: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 1: Async gradient aggregation wait time: 0.006886 -MPI Rank 1: Actual gradient aggregation time: 0.071953 -MPI Rank 1: Async gradient aggregation wait time: 0.012085 -MPI Rank 1: Actual gradient aggregation time: 0.069686 -MPI Rank 1: 08/16/2016 03:04:25: Epoch[ 4 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95451978 * 9216; EvalClassificationError = 0.52962240 * 9216; time = 0.7025s; samplesPerSecond = 13118.8 -MPI Rank 1: Async gradient aggregation wait time: 0.002115 -MPI Rank 1: Actual gradient aggregation time: 0.07045 -MPI Rank 1: Async gradient aggregation wait time: 0.006221 -MPI Rank 1: Actual gradient aggregation time: 0.068297 -MPI Rank 1: 08/16/2016 03:04:26: Epoch[ 4 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.95218466 * 10240; EvalClassificationError = 0.52802734 * 10240; time = 0.7012s; samplesPerSecond = 14604.2 -MPI Rank 1: Async gradient aggregation wait time: 0.009506 -MPI Rank 1: 08/16/2016 03:04:26: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.95485031 * 20480; EvalClassificationError = 0.52915039 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=1.42827s -MPI Rank 1: 08/16/2016 03:04:26: CNTKCommandTrainEnd: speechTrain -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:26: Action "train" complete. -MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:26: __COMPLETED__ -MPI Rank 1: ~MPIWrapper -MPI Rank 2: 08/16/2016 03:04:11: Redirecting stderr to file C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr_speechTrain.logrank2 -MPI Rank 2: 08/16/2016 03:04:11: ------------------------------------------------------------------- -MPI Rank 2: 08/16/2016 03:04:11: Build info: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:11: Built time: Aug 16 2016 02:54:53 -MPI Rank 2: 08/16/2016 03:04:11: Last modified date: Fri Aug 12 05:31:21 2016 -MPI Rank 2: 08/16/2016 03:04:11: Build type: Release -MPI Rank 2: 08/16/2016 03:04:11: Build target: GPU -MPI Rank 2: 08/16/2016 03:04:11: With 1bit-SGD: no -MPI Rank 2: 08/16/2016 03:04:11: Math lib: mkl -MPI Rank 2: 08/16/2016 03:04:11: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 -MPI Rank 2: 08/16/2016 03:04:11: CUB_PATH: c:\src\cub-1.4.1 -MPI Rank 2: 08/16/2016 03:04:11: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda -MPI Rank 2: 08/16/2016 03:04:11: Build Branch: HEAD -MPI Rank 2: 08/16/2016 03:04:11: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84 -MPI Rank 2: 08/16/2016 03:04:11: Built by svcphil on Philly-Pool3 -MPI Rank 2: 08/16/2016 03:04:11: Build Path: c:\Jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ -MPI Rank 2: 08/16/2016 03:04:11: ------------------------------------------------------------------- -MPI Rank 2: 08/16/2016 03:04:11: ------------------------------------------------------------------- -MPI Rank 2: 08/16/2016 03:04:11: GPU info: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:11: Device[0]: cores = 2496; computeCapability = 5.2; type = "Quadro M4000"; memory = 8192 MB -MPI Rank 2: 08/16/2016 03:04:11: ------------------------------------------------------------------- -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:11: Running on cntk-muc01 at 2016/08/16 03:04:11 -MPI Rank 2: 08/16/2016 03:04:11: Command line: -MPI Rank 2: C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN/cntk.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu DeviceId=0 timestamping=true numCPUThreads=2 precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:11: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>> -MPI Rank 2: 08/16/2016 03:04:11: precision = "float" -MPI Rank 2: command = speechTrain -MPI Rank 2: deviceId = $DeviceId$ -MPI Rank 2: parallelTrain = true -MPI Rank 2: speechTrain = [ -MPI Rank 2: action = "train" -MPI Rank 2: modelPath = "$RunDir$/models/cntkSpeech.dnn" -MPI Rank 2: deviceId = $DeviceId$ -MPI Rank 2: traceLevel = 1 -MPI Rank 2: SimpleNetworkBuilder = [ -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 2: evalCriterion = "ClassificationError" -MPI Rank 2: layerTypes = "Sigmoid" -MPI Rank 2: initValueScale = 1.0 -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: uniformInit = true -MPI Rank 2: needPrior = true -MPI Rank 2: ] -MPI Rank 2: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = 'CE' -MPI Rank 2: evalCriterion = 'Err' -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: L = Length(layerSizes)-1 // number of model layers -MPI Rank 2: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 2: featNorm = if applyMeanVarNorm -MPI Rank 2: then MeanVarNorm(features) -MPI Rank 2: else features -MPI Rank 2: layers[layer:1..L-1] = if layer > 1 -MPI Rank 2: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 2: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 2: CE = if trainingCriterion == 'CE' -MPI Rank 2: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 2: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 2: Err = if evalCriterion == 'Err' then -MPI Rank 2: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 2: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 2: logPrior = LogPrior(labels) -MPI Rank 2: // TODO: how to add a tag to an infix operation? -MPI Rank 2: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 2: ] -MPI Rank 2: SGD = [ -MPI Rank 2: epochSize = 20480 -MPI Rank 2: minibatchSize = 64:256:1024 -MPI Rank 2: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 2: numMBsToShowResult = 10 -MPI Rank 2: momentumPerMB = 0.9:0.656119 -MPI Rank 2: dropoutRate = 0.0 -MPI Rank 2: maxEpochs = 3 -MPI Rank 2: keepCheckPointFiles = true -MPI Rank 2: clippingThresholdPerSample = 1#INF -MPI Rank 2: ParallelTrain = [ -MPI Rank 2: parallelizationMethod = "DataParallelSGD" -MPI Rank 2: distributedMBReading = true -MPI Rank 2: DataParallelSGD = [ -MPI Rank 2: gradientBits = 32 -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: AutoAdjust = [ -MPI Rank 2: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 2: loadBestModel = true -MPI Rank 2: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 2: learnRateDecreaseFactor = 0.5 -MPI Rank 2: learnRateIncreaseFactor = 1.382 -MPI Rank 2: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: reader = [ -MPI Rank 2: readerType = "HTKMLFReader" -MPI Rank 2: readMethod = "blockRandomize" -MPI Rank 2: miniBatchMode = "partial" -MPI Rank 2: randomize = "auto" -MPI Rank 2: verbosity = 0 -MPI Rank 2: useMersenneTwisterRand=true -MPI Rank 2: features = [ -MPI Rank 2: dim = 363 -MPI Rank 2: type = "real" -MPI Rank 2: scpFile = "glob_0000.scp" -MPI Rank 2: ] -MPI Rank 2: labels = [ -MPI Rank 2: mlfFile = "$DataDir$/glob_0000.mlf" -MPI Rank 2: labelMappingFile = "$DataDir$/state.list" -MPI Rank 2: labelDim = 132 -MPI Rank 2: labelType = "category" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 2: DeviceId=0 -MPI Rank 2: timestamping=true -MPI Rank 2: numCPUThreads=2 -MPI Rank 2: precision=double -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]] -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:11: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<< -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:11: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 2: 08/16/2016 03:04:11: precision = "float" -MPI Rank 2: command = speechTrain -MPI Rank 2: deviceId = 0 -MPI Rank 2: parallelTrain = true -MPI Rank 2: speechTrain = [ -MPI Rank 2: action = "train" -MPI Rank 2: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn" -MPI Rank 2: deviceId = 0 -MPI Rank 2: traceLevel = 1 -MPI Rank 2: SimpleNetworkBuilder = [ -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 2: evalCriterion = "ClassificationError" -MPI Rank 2: layerTypes = "Sigmoid" -MPI Rank 2: initValueScale = 1.0 -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: uniformInit = true -MPI Rank 2: needPrior = true -MPI Rank 2: ] -MPI Rank 2: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = 'CE' -MPI Rank 2: evalCriterion = 'Err' -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: L = Length(layerSizes)-1 // number of model layers -MPI Rank 2: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 2: featNorm = if applyMeanVarNorm -MPI Rank 2: then MeanVarNorm(features) -MPI Rank 2: else features -MPI Rank 2: layers[layer:1..L-1] = if layer > 1 -MPI Rank 2: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 2: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 2: CE = if trainingCriterion == 'CE' -MPI Rank 2: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 2: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 2: Err = if evalCriterion == 'Err' then -MPI Rank 2: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 2: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 2: logPrior = LogPrior(labels) -MPI Rank 2: // TODO: how to add a tag to an infix operation? -MPI Rank 2: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 2: ] -MPI Rank 2: SGD = [ -MPI Rank 2: epochSize = 20480 -MPI Rank 2: minibatchSize = 64:256:1024 -MPI Rank 2: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 2: numMBsToShowResult = 10 -MPI Rank 2: momentumPerMB = 0.9:0.656119 -MPI Rank 2: dropoutRate = 0.0 -MPI Rank 2: maxEpochs = 3 -MPI Rank 2: keepCheckPointFiles = true -MPI Rank 2: clippingThresholdPerSample = 1#INF -MPI Rank 2: ParallelTrain = [ -MPI Rank 2: parallelizationMethod = "DataParallelSGD" -MPI Rank 2: distributedMBReading = true -MPI Rank 2: DataParallelSGD = [ -MPI Rank 2: gradientBits = 32 -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: AutoAdjust = [ -MPI Rank 2: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 2: loadBestModel = true -MPI Rank 2: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 2: learnRateDecreaseFactor = 0.5 -MPI Rank 2: learnRateIncreaseFactor = 1.382 -MPI Rank 2: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: reader = [ -MPI Rank 2: readerType = "HTKMLFReader" -MPI Rank 2: readMethod = "blockRandomize" -MPI Rank 2: miniBatchMode = "partial" -MPI Rank 2: randomize = "auto" -MPI Rank 2: verbosity = 0 -MPI Rank 2: useMersenneTwisterRand=true -MPI Rank 2: features = [ -MPI Rank 2: dim = 363 -MPI Rank 2: type = "real" -MPI Rank 2: scpFile = "glob_0000.scp" -MPI Rank 2: ] -MPI Rank 2: labels = [ -MPI Rank 2: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 2: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 2: labelDim = 132 -MPI Rank 2: labelType = "category" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 2: DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 2: OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 2: DeviceId=0 -MPI Rank 2: timestamping=true -MPI Rank 2: numCPUThreads=2 -MPI Rank 2: precision=double -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] -MPI Rank 2: speechTrain=[SGD=[maxEpochs=4]] -MPI Rank 2: speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 2: stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:11: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:11: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> -MPI Rank 2: configparameters: cntk.cntk:command=speechTrain -MPI Rank 2: configparameters: cntk.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\DNN -MPI Rank 2: configparameters: cntk.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: configparameters: cntk.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data -MPI Rank 2: configparameters: cntk.cntk:deviceId=0 -MPI Rank 2: configparameters: cntk.cntk:numCPUThreads=2 -MPI Rank 2: configparameters: cntk.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 2: configparameters: cntk.cntk:parallelTrain=true -MPI Rank 2: configparameters: cntk.cntk:precision=double -MPI Rank 2: configparameters: cntk.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu -MPI Rank 2: configparameters: cntk.cntk:speechTrain=[ -MPI Rank 2: action = "train" -MPI Rank 2: modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn" -MPI Rank 2: deviceId = 0 -MPI Rank 2: traceLevel = 1 -MPI Rank 2: SimpleNetworkBuilder = [ -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = "CrossEntropyWithSoftmax" -MPI Rank 2: evalCriterion = "ClassificationError" -MPI Rank 2: layerTypes = "Sigmoid" -MPI Rank 2: initValueScale = 1.0 -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: uniformInit = true -MPI Rank 2: needPrior = true -MPI Rank 2: ] -MPI Rank 2: ExperimentalNetworkBuilder = [ // the same as above but with BS. Not active; activate by commenting out the SimpleNetworkBuilder entry above -MPI Rank 2: layerSizes = 363:512:512:132 -MPI Rank 2: trainingCriterion = 'CE' -MPI Rank 2: evalCriterion = 'Err' -MPI Rank 2: applyMeanVarNorm = true -MPI Rank 2: L = Length(layerSizes)-1 // number of model layers -MPI Rank 2: features = Input(layerSizes[0], 1, tag='feature') ; labels = Input(layerSizes[Length(layerSizes)-1], 1, tag='label') -MPI Rank 2: featNorm = if applyMeanVarNorm -MPI Rank 2: then MeanVarNorm(features) -MPI Rank 2: else features -MPI Rank 2: layers[layer:1..L-1] = if layer > 1 -MPI Rank 2: then SBFF(layers[layer-1].Eh, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: else SBFF(featNorm, layerSizes[layer], layerSizes[layer-1]) -MPI Rank 2: outLayer = BFF(layers[L-1].Eh, layerSizes[L], layerSizes[L-1]) -MPI Rank 2: outZ = outLayer.z // + PastValue(layerSizes[L], 1, outLayer.z) -MPI Rank 2: CE = if trainingCriterion == 'CE' -MPI Rank 2: then CrossEntropyWithSoftmax(labels, outZ, tag='criterion') -MPI Rank 2: else Fail('unknown trainingCriterion ' + trainingCriterion) -MPI Rank 2: Err = if evalCriterion == 'Err' then -MPI Rank 2: ClassificationError(labels, outZ, tag='evaluation') -MPI Rank 2: else Fail('unknown evalCriterion ' + evalCriterion) -MPI Rank 2: logPrior = LogPrior(labels) -MPI Rank 2: // TODO: how to add a tag to an infix operation? -MPI Rank 2: ScaledLogLikelihood = Minus (outZ, logPrior, tag='output') -MPI Rank 2: ] -MPI Rank 2: SGD = [ -MPI Rank 2: epochSize = 20480 -MPI Rank 2: minibatchSize = 64:256:1024 -MPI Rank 2: learningRatesPerMB = 1.0:0.5:0.1 -MPI Rank 2: numMBsToShowResult = 10 -MPI Rank 2: momentumPerMB = 0.9:0.656119 -MPI Rank 2: dropoutRate = 0.0 -MPI Rank 2: maxEpochs = 3 -MPI Rank 2: keepCheckPointFiles = true -MPI Rank 2: clippingThresholdPerSample = 1#INF -MPI Rank 2: ParallelTrain = [ -MPI Rank 2: parallelizationMethod = "DataParallelSGD" -MPI Rank 2: distributedMBReading = true -MPI Rank 2: DataParallelSGD = [ -MPI Rank 2: gradientBits = 32 -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: AutoAdjust = [ -MPI Rank 2: reduceLearnRateIfImproveLessThan = 0 -MPI Rank 2: loadBestModel = true -MPI Rank 2: increaseLearnRateIfImproveMoreThan = 1000000000 -MPI Rank 2: learnRateDecreaseFactor = 0.5 -MPI Rank 2: learnRateIncreaseFactor = 1.382 -MPI Rank 2: autoAdjustLR = "adjustAfterEpoch" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: reader = [ -MPI Rank 2: readerType = "HTKMLFReader" -MPI Rank 2: readMethod = "blockRandomize" -MPI Rank 2: miniBatchMode = "partial" -MPI Rank 2: randomize = "auto" -MPI Rank 2: verbosity = 0 -MPI Rank 2: useMersenneTwisterRand=true -MPI Rank 2: features = [ -MPI Rank 2: dim = 363 -MPI Rank 2: type = "real" -MPI Rank 2: scpFile = "glob_0000.scp" -MPI Rank 2: ] -MPI Rank 2: labels = [ -MPI Rank 2: mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf" -MPI Rank 2: labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list" -MPI Rank 2: labelDim = 132 -MPI Rank 2: labelType = "category" -MPI Rank 2: ] -MPI Rank 2: ] -MPI Rank 2: ] [SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] [SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] [SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] [SGD=[maxEpochs=4]] [SGD=[ParallelTrain=[syncPerfStats=5]]] -MPI Rank 2: -MPI Rank 2: configparameters: cntk.cntk:stderr=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/stderr -MPI Rank 2: configparameters: cntk.cntk:timestamping=true -MPI Rank 2: 08/16/2016 03:04:11: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< -MPI Rank 2: 08/16/2016 03:04:11: Commands: speechTrain -MPI Rank 2: 08/16/2016 03:04:11: Precision = "double" -MPI Rank 2: 08/16/2016 03:04:11: Using 2 CPU threads. -MPI Rank 2: 08/16/2016 03:04:11: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816030157.855216\Speech\DNN_ParallelNoQuantizationBufferedAsyncGradientAggregation@release_gpu/models/cntkSpeech.dnn -MPI Rank 2: 08/16/2016 03:04:11: CNTKCommandTrainInfo: speechTrain : 4 -MPI Rank 2: 08/16/2016 03:04:11: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 4 -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:11: ############################################################################## -MPI Rank 2: 08/16/2016 03:04:11: # # -MPI Rank 2: 08/16/2016 03:04:11: # Action "train" # -MPI Rank 2: 08/16/2016 03:04:11: # # -MPI Rank 2: 08/16/2016 03:04:11: ############################################################################## -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:11: CNTKCommandTrainBegin: speechTrain -MPI Rank 2: SimpleNetworkBuilder Using GPU 0 -MPI Rank 2: reading script file glob_0000.scp ... 948 entries -MPI Rank 2: total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/state.list -MPI Rank 2: htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Tests\EndToEndTests\Speech\Data/glob_0000.mlf ... total 948 entries -MPI Rank 2: ...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances -MPI Rank 2: label set 0: 129 classes -MPI Rank 2: minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:12: Creating virgin network. -MPI Rank 2: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000. -MPI Rank 2: Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false). -MPI Rank 2: Microsoft::MSR::CNTK::GPUMatrix::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==8 -MPI Rank 2: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 2: Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 2: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000. -MPI Rank 2: Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false). -MPI Rank 2: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 2: Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000. -MPI Rank 2: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000. -MPI Rank 2: Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false). -MPI Rank 2: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 2: Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000. -MPI Rank 2: -MPI Rank 2: Post-processing network... -MPI Rank 2: -MPI Rank 2: 7 roots: -MPI Rank 2: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() -MPI Rank 2: EvalClassificationError = ClassificationError() -MPI Rank 2: InvStdOfFeatures = InvStdDev() -MPI Rank 2: MeanOfFeatures = Mean() -MPI Rank 2: PosteriorProb = Softmax() -MPI Rank 2: Prior = Mean() -MPI Rank 2: ScaledLogLikelihood = Minus() -MPI Rank 2: -MPI Rank 2: Validating network. 25 nodes to process in pass 1. -MPI Rank 2: -MPI Rank 2: Validating --> labels = InputValue() : -> [132 x *] -MPI Rank 2: Validating --> W2 = LearnableParameter() : -> [132 x 512] -MPI Rank 2: Validating --> W1 = LearnableParameter() : -> [512 x 512] -MPI Rank 2: Validating --> W0 = LearnableParameter() : -> [512 x 363] -MPI Rank 2: Validating --> features = InputValue() : -> [363 x *] -MPI Rank 2: Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363] -MPI Rank 2: Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363] -MPI Rank 2: Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *] -MPI Rank 2: Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *] -MPI Rank 2: Validating --> B0 = LearnableParameter() : -> [512 x 1] -MPI Rank 2: Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 2: Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 2: Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 2: Validating --> B1 = LearnableParameter() : -> [512 x 1] -MPI Rank 2: Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *] -MPI Rank 2: Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *] -MPI Rank 2: Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *] -MPI Rank 2: Validating --> B2 = LearnableParameter() : -> [132 x 1] -MPI Rank 2: Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *] -MPI Rank 2: Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 2: Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] -MPI Rank 2: Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *] -MPI Rank 2: Validating --> Prior = Mean (labels) : [132 x *] -> [132] -MPI Rank 2: Validating --> LogOfPrior = Log (Prior) : [132] -> [132] -MPI Rank 2: Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *] -MPI Rank 2: -MPI Rank 2: Validating network. 17 nodes to process in pass 2. -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: Validating network, final pass. -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 12 out of 25 nodes do not share the minibatch layout with the input data. -MPI Rank 2: -MPI Rank 2: Post-processing network complete. -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:12: Created model with 25 nodes on GPU 0. -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:12: Training criterion node(s): -MPI Rank 2: 08/16/2016 03:04:12: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:12: Evaluation criterion node(s): -MPI Rank 2: 08/16/2016 03:04:12: EvalClassificationError = ClassificationError -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: Allocating matrices for forward and/or backward propagation. -MPI Rank 2: -MPI Rank 2: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared. -MPI Rank 2: -MPI Rank 2: { W0 : [512 x 363] (gradient) -MPI Rank 2: W0*features+B0 : [512 x 1 x *] } -MPI Rank 2: { H1 : [512 x 1 x *] -MPI Rank 2: W0*features : [512 x *] (gradient) } -MPI Rank 2: { W0*features+B0 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1 : [512 x 1 x *] } -MPI Rank 2: { B0 : [512 x 1] (gradient) -MPI Rank 2: H1 : [512 x 1 x *] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] (gradient) -MPI Rank 2: W2*H1 : [132 x 1 x *] } -MPI Rank 2: { B1 : [512 x 1] (gradient) -MPI Rank 2: H2 : [512 x 1 x *] (gradient) -MPI Rank 2: HLast : [132 x 1 x *] (gradient) } -MPI Rank 2: { W1 : [512 x 512] (gradient) -MPI Rank 2: W1*H1+B1 : [512 x 1 x *] } -MPI Rank 2: { HLast : [132 x 1 x *] -MPI Rank 2: W2 : [132 x 512] (gradient) } -MPI Rank 2: { H2 : [512 x 1 x *] -MPI Rank 2: W1*H1 : [512 x 1 x *] (gradient) } -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:12: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:12: Node 'B0' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 08/16/2016 03:04:12: Node 'B1' (LearnableParameter operation) : [512 x 1] -MPI Rank 2: 08/16/2016 03:04:12: Node 'B2' (LearnableParameter operation) : [132 x 1] -MPI Rank 2: 08/16/2016 03:04:12: Node 'W0' (LearnableParameter operation) : [512 x 363] -MPI Rank 2: 08/16/2016 03:04:12: Node 'W1' (LearnableParameter operation) : [512 x 512] -MPI Rank 2: 08/16/2016 03:04:12: Node 'W2' (LearnableParameter operation) : [132 x 512] -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:12: Precomputing --> 3 PreCompute nodes found. -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:12: MeanOfFeatures = Mean() -MPI Rank 2: 08/16/2016 03:04:12: InvStdOfFeatures = InvStdDev() -MPI Rank 2: 08/16/2016 03:04:12: Prior = Mean() -MPI Rank 2: minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 2: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:17: Precomputing --> Completed. -MPI Rank 2: -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:17: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples -MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:17: Starting minibatch loop. -MPI Rank 2: 08/16/2016 03:04:17: Epoch[ 1 of 4]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.1376s; samplesPerSecond = 4650.0 -MPI Rank 2: 08/16/2016 03:04:17: Epoch[ 1 of 4]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.1334s; samplesPerSecond = 4797.3 -MPI Rank 2: 08/16/2016 03:04:17: Epoch[ 1 of 4]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.1484s; samplesPerSecond = 4311.5 -MPI Rank 2: 08/16/2016 03:04:17: Epoch[ 1 of 4]-Minibatch[ 31- 40, 12.50%]: CrossEntropyWithSoftmax = 3.73643568 * 640; EvalClassificationError = 0.84531250 * 640; time = 0.1350s; samplesPerSecond = 4742.0 -MPI Rank 2: 08/16/2016 03:04:17: Epoch[ 1 of 4]-Minibatch[ 41- 50, 15.63%]: CrossEntropyWithSoftmax = 3.83079081 * 640; EvalClassificationError = 0.88281250 * 640; time = 0.1463s; samplesPerSecond = 4374.1 -MPI Rank 2: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 51- 60, 18.75%]: CrossEntropyWithSoftmax = 3.71437689 * 640; EvalClassificationError = 0.86875000 * 640; time = 0.1339s; samplesPerSecond = 4778.5 -MPI Rank 2: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 61- 70, 21.88%]: CrossEntropyWithSoftmax = 3.42186230 * 640; EvalClassificationError = 0.79062500 * 640; time = 0.1146s; samplesPerSecond = 5587.0 -MPI Rank 2: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 71- 80, 25.00%]: CrossEntropyWithSoftmax = 3.53658053 * 640; EvalClassificationError = 0.82031250 * 640; time = 0.1335s; samplesPerSecond = 4795.3 -MPI Rank 2: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 81- 90, 28.13%]: CrossEntropyWithSoftmax = 3.49758017 * 640; EvalClassificationError = 0.81718750 * 640; time = 0.1422s; samplesPerSecond = 4502.1 -MPI Rank 2: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 91- 100, 31.25%]: CrossEntropyWithSoftmax = 3.39996308 * 640; EvalClassificationError = 0.80468750 * 640; time = 0.1471s; samplesPerSecond = 4351.7 -MPI Rank 2: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 101- 110, 34.38%]: CrossEntropyWithSoftmax = 3.49445773 * 640; EvalClassificationError = 0.82500000 * 640; time = 0.1241s; samplesPerSecond = 5158.6 -MPI Rank 2: 08/16/2016 03:04:18: Epoch[ 1 of 4]-Minibatch[ 111- 120, 37.50%]: CrossEntropyWithSoftmax = 3.26676999 * 640; EvalClassificationError = 0.79218750 * 640; time = 0.1253s; samplesPerSecond = 5108.5 -MPI Rank 2: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 121- 130, 40.63%]: CrossEntropyWithSoftmax = 3.18870173 * 640; EvalClassificationError = 0.78906250 * 640; time = 0.1444s; samplesPerSecond = 4432.0 -MPI Rank 2: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 131- 140, 43.75%]: CrossEntropyWithSoftmax = 3.05687264 * 640; EvalClassificationError = 0.74687500 * 640; time = 0.1374s; samplesPerSecond = 4658.1 -MPI Rank 2: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 141- 150, 46.88%]: CrossEntropyWithSoftmax = 2.95594569 * 640; EvalClassificationError = 0.71875000 * 640; time = 0.1264s; samplesPerSecond = 5063.4 -MPI Rank 2: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 151- 160, 50.00%]: CrossEntropyWithSoftmax = 3.10219604 * 640; EvalClassificationError = 0.74062500 * 640; time = 0.1165s; samplesPerSecond = 5492.4 -MPI Rank 2: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 161- 170, 53.13%]: CrossEntropyWithSoftmax = 2.80745014 * 640; EvalClassificationError = 0.70625000 * 640; time = 0.1207s; samplesPerSecond = 5301.6 -MPI Rank 2: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 171- 180, 56.25%]: CrossEntropyWithSoftmax = 2.72061842 * 640; EvalClassificationError = 0.65468750 * 640; time = 0.1152s; samplesPerSecond = 5553.6 -MPI Rank 2: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 181- 190, 59.38%]: CrossEntropyWithSoftmax = 2.80425747 * 640; EvalClassificationError = 0.71718750 * 640; time = 0.1283s; samplesPerSecond = 4987.5 -MPI Rank 2: 08/16/2016 03:04:19: Epoch[ 1 of 4]-Minibatch[ 191- 200, 62.50%]: CrossEntropyWithSoftmax = 2.71253068 * 640; EvalClassificationError = 0.67812500 * 640; time = 0.1054s; samplesPerSecond = 6073.0 -MPI Rank 2: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 201- 210, 65.63%]: CrossEntropyWithSoftmax = 2.59360399 * 640; EvalClassificationError = 0.66093750 * 640; time = 0.1321s; samplesPerSecond = 4844.3 -MPI Rank 2: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 211- 220, 68.75%]: CrossEntropyWithSoftmax = 2.60386649 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.1202s; samplesPerSecond = 5323.7 -MPI Rank 2: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 221- 230, 71.88%]: CrossEntropyWithSoftmax = 2.53706678 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.1285s; samplesPerSecond = 4979.8 -MPI Rank 2: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 231- 240, 75.00%]: CrossEntropyWithSoftmax = 2.56177343 * 640; EvalClassificationError = 0.65625000 * 640; time = 0.1410s; samplesPerSecond = 4538.1 -MPI Rank 2: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 241- 250, 78.13%]: CrossEntropyWithSoftmax = 2.50118791 * 640; EvalClassificationError = 0.64218750 * 640; time = 0.1448s; samplesPerSecond = 4419.4 -MPI Rank 2: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 251- 260, 81.25%]: CrossEntropyWithSoftmax = 2.40119788 * 640; EvalClassificationError = 0.62500000 * 640; time = 0.1214s; samplesPerSecond = 5272.9 -MPI Rank 2: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 261- 270, 84.38%]: CrossEntropyWithSoftmax = 2.27491503 * 640; EvalClassificationError = 0.58906250 * 640; time = 0.1147s; samplesPerSecond = 5581.4 -MPI Rank 2: 08/16/2016 03:04:20: Epoch[ 1 of 4]-Minibatch[ 271- 280, 87.50%]: CrossEntropyWithSoftmax = 2.51724208 * 640; EvalClassificationError = 0.65781250 * 640; time = 0.1382s; samplesPerSecond = 4631.8 -MPI Rank 2: 08/16/2016 03:04:21: Epoch[ 1 of 4]-Minibatch[ 281- 290, 90.63%]: CrossEntropyWithSoftmax = 2.27797542 * 640; EvalClassificationError = 0.59687500 * 640; time = 0.1412s; samplesPerSecond = 4533.4 -MPI Rank 2: 08/16/2016 03:04:21: Epoch[ 1 of 4]-Minibatch[ 291- 300, 93.75%]: CrossEntropyWithSoftmax = 2.26017740 * 640; EvalClassificationError = 0.60937500 * 640; time = 0.1208s; samplesPerSecond = 5299.7 -MPI Rank 2: 08/16/2016 03:04:21: Epoch[ 1 of 4]-Minibatch[ 301- 310, 96.88%]: CrossEntropyWithSoftmax = 2.24735342 * 640; EvalClassificationError = 0.58437500 * 640; time = 0.1228s; samplesPerSecond = 5213.8 -MPI Rank 2: 08/16/2016 03:04:21: Epoch[ 1 of 4]-Minibatch[ 311- 320, 100.00%]: CrossEntropyWithSoftmax = 2.23665382 * 640; EvalClassificationError = 0.60625000 * 640; time = 0.1270s; samplesPerSecond = 5041.1 -MPI Rank 2: 08/16/2016 03:04:21: Finished Epoch[ 1 of 4]: [Training] CrossEntropyWithSoftmax = 3.03815141 * 20480; EvalClassificationError = 0.73432617 * 20480; totalSamplesSeen = 20480; learningRatePerSample = 0.015625; epochTime=4.18004s -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:21: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples -MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 2: Actual gradient aggregation time: 0.03301 -MPI Rank 2: Async gradient aggregation wait time: 0.004502 -MPI Rank 2: Actual gradient aggregation time: 0.025447 -MPI Rank 2: 08/16/2016 03:04:21: Epoch[ 2 of 4]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.18586881 * 2304; EvalClassificationError = 0.58029514 * 2304; time = 0.2549s; samplesPerSecond = 9039.8 -MPI Rank 2: Async gradient aggregation wait time: 0.000896 -MPI Rank 2: Actual gradient aggregation time: 0.022431 -MPI Rank 2: Async gradient aggregation wait time: 0.010162 -MPI Rank 2: Actual gradient aggregation time: 0.027066 -MPI Rank 2: 08/16/2016 03:04:22: Epoch[ 2 of 4]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.21453123 * 2560; EvalClassificationError = 0.59101563 * 2560; time = 0.2272s; samplesPerSecond = 11267.0 -MPI Rank 2: Async gradient aggregation wait time: 0.015988 -MPI Rank 2: Actual gradient aggregation time: 0.023278 -MPI Rank 2: Async gradient aggregation wait time: 0.017319 -MPI Rank 2: Actual gradient aggregation time: 0.023129 -MPI Rank 2: 08/16/2016 03:04:22: Epoch[ 2 of 4]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.23428937 * 2560; EvalClassificationError = 0.59843750 * 2560; time = 0.2441s; samplesPerSecond = 10486.2 -MPI Rank 2: Async gradient aggregation wait time: 0.016112 -MPI Rank 2: Actual gradient aggregation time: 0.023574 -MPI Rank 2: Async gradient aggregation wait time: 0.009482 -MPI Rank 2: Actual gradient aggregation time: 0.024348 -MPI Rank 2: 08/16/2016 03:04:22: Epoch[ 2 of 4]-Minibatch[ 31- 40, 50.00%]: CrossEntropyWithSoftmax = 2.22238576 * 2560; EvalClassificationError = 0.59609375 * 2560; time = 0.2431s; samplesPerSecond = 10531.5 -MPI Rank 2: Async gradient aggregation wait time: 0.014152 -MPI Rank 2: Actual gradient aggregation time: 0.023799 -MPI Rank 2: Async gradient aggregation wait time: 0.017039 -MPI Rank 2: Actual gradient aggregation time: 0.024632 -MPI Rank 2: 08/16/2016 03:04:22: Epoch[ 2 of 4]-Minibatch[ 41- 50, 62.50%]: CrossEntropyWithSoftmax = 2.17945944 * 2560; EvalClassificationError = 0.58632812 * 2560; time = 0.2405s; samplesPerSecond = 10645.6 -MPI Rank 2: Async gradient aggregation wait time: 0.013644 -MPI Rank 2: Actual gradient aggregation time: 0.023504 -MPI Rank 2: Async gradient aggregation wait time: 1e-006 -MPI Rank 2: Actual gradient aggregation time: 0.022575 -MPI Rank 2: 08/16/2016 03:04:23: Epoch[ 2 of 4]-Minibatch[ 51- 60, 75.00%]: CrossEntropyWithSoftmax = 2.13880131 * 2560; EvalClassificationError = 0.58164063 * 2560; time = 0.2355s; samplesPerSecond = 10868.4 -MPI Rank 2: Async gradient aggregation wait time: 0.012693 -MPI Rank 2: Actual gradient aggregation time: 0.02266 -MPI Rank 2: Async gradient aggregation wait time: 0.013298 -MPI Rank 2: Actual gradient aggregation time: 0.023268 -MPI Rank 2: 08/16/2016 03:04:23: Epoch[ 2 of 4]-Minibatch[ 61- 70, 87.50%]: CrossEntropyWithSoftmax = 2.12741612 * 2560; EvalClassificationError = 0.57031250 * 2560; time = 0.2457s; samplesPerSecond = 10417.7 -MPI Rank 2: Async gradient aggregation wait time: 0.016006 -MPI Rank 2: Actual gradient aggregation time: 0.024946 -MPI Rank 2: Async gradient aggregation wait time: 0.011367 -MPI Rank 2: Actual gradient aggregation time: 0.023964 -MPI Rank 2: 08/16/2016 03:04:23: Epoch[ 2 of 4]-Minibatch[ 71- 80, 100.00%]: CrossEntropyWithSoftmax = 2.09486743 * 2560; EvalClassificationError = 0.58242187 * 2560; time = 0.2421s; samplesPerSecond = 10576.1 -MPI Rank 2: Async gradient aggregation wait time: 0.008734 -MPI Rank 2: Actual gradient aggregation time: 0.010393 -MPI Rank 2: 08/16/2016 03:04:23: Finished Epoch[ 2 of 4]: [Training] CrossEntropyWithSoftmax = 2.17271296 * 20480; EvalClassificationError = 0.58520508 * 20480; totalSamplesSeen = 40960; learningRatePerSample = 0.001953125; epochTime=1.95924s -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:23: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 2: Async gradient aggregation wait time: 0.001929 -MPI Rank 2: Actual gradient aggregation time: 0.069767 -MPI Rank 2: Async gradient aggregation wait time: 0.051731 -MPI Rank 2: Actual gradient aggregation time: 0.07264 -MPI Rank 2: 08/16/2016 03:04:24: Epoch[ 3 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 2.17281503 * 9216; EvalClassificationError = 0.55924479 * 9216; time = 0.7206s; samplesPerSecond = 12788.5 -MPI Rank 2: Async gradient aggregation wait time: 0.003921 -MPI Rank 2: Actual gradient aggregation time: 0.068698 -MPI Rank 2: Async gradient aggregation wait time: 0.047368 -MPI Rank 2: Actual gradient aggregation time: 0.053525 -MPI Rank 2: 08/16/2016 03:04:25: Epoch[ 3 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 2.02446206 * 10240; EvalClassificationError = 0.55722656 * 10240; time = 0.6942s; samplesPerSecond = 14750.6 -MPI Rank 2: 08/16/2016 03:04:25: Finished Epoch[ 3 of 4]: [Training] CrossEntropyWithSoftmax = 2.09074709 * 20480; EvalClassificationError = 0.55820313 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.45892s -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:25: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples -MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. -MPI Rank 2: Async gradient aggregation wait time: 0.005577 -MPI Rank 2: Actual gradient aggregation time: 0.072623 -MPI Rank 2: Async gradient aggregation wait time: 0.000919 -MPI Rank 2: Actual gradient aggregation time: 0.069425 -MPI Rank 2: 08/16/2016 03:04:25: Epoch[ 4 of 4]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95451978 * 9216; EvalClassificationError = 0.52962240 * 9216; time = 0.7018s; samplesPerSecond = 13132.1 -MPI Rank 2: Async gradient aggregation wait time: 0.049684 -MPI Rank 2: Actual gradient aggregation time: 0.069187 -MPI Rank 2: Async gradient aggregation wait time: 0.031937 -MPI Rank 2: Actual gradient aggregation time: 0.068147 -MPI Rank 2: 08/16/2016 03:04:26: Epoch[ 4 of 4]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.95218466 * 10240; EvalClassificationError = 0.52802734 * 10240; time = 0.7017s; samplesPerSecond = 14592.8 -MPI Rank 2: Async gradient aggregation wait time: 0.009288 -MPI Rank 2: 08/16/2016 03:04:26: Finished Epoch[ 4 of 4]: [Training] CrossEntropyWithSoftmax = 1.95485031 * 20480; EvalClassificationError = 0.52915039 * 20480; totalSamplesSeen = 81920; learningRatePerSample = 9.7656251e-005; epochTime=1.42715s -MPI Rank 2: 08/16/2016 03:04:26: CNTKCommandTrainEnd: speechTrain -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:26: Action "train" complete. -MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:26: __COMPLETED__ -MPI Rank 2: ~MPIWrapper \ No newline at end of file diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/run-test b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/run-test deleted file mode 100755 index fcc587a00..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/run-test +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -. $TEST_ROOT_DIR/run-test-common - -OriginalTestDir=../../../DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation -ConfigDir=$TEST_DIR/../../../DNN -LogFileName=stderr -Instances=3 -NumCPUThreads=$(threadsPerInstance $Instances) - -(cd $TEST_DIR/$OriginalTestDir && md5sum baseline*) | (cd $TEST_DIR && md5sum --status -c -) -if [ $? != 0 ]; then - echo Error: Baselines must match original test. Copy from $OriginalTestDir. - exit 1 -fi - -# cntkmpirun -cntkmpirun "-n $Instances" cntk.cntk "speechTrain=[reader=[readerType=HTKDeserializers]] numCPUThreads=$NumCPUThreads precision=double speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[gradientBits=64]]]] speechTrain=[SGD=[ParallelTrain=[DataParallelSGD=[useBufferedAsyncGradientAggregation=true]]]] speechTrain=[SGD=[ParallelTrain=[parallelizationStartEpoch=2]]] speechTrain=[SGD=[maxEpochs=4]] speechTrain=[SGD=[ParallelTrain=[syncPerfStats=5]]]" -ExitCode=$? -sed 's/^/MPI Rank 0: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank0 -sed 's/^/MPI Rank 1: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank1 -sed 's/^/MPI Rank 2: /' $TEST_RUN_DIR/"$LogFileName"_speechTrain.logrank2 -exit $ExitCode diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/testcases.yml b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/testcases.yml deleted file mode 100644 index c541ed64b..000000000 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/testcases.yml +++ /dev/null @@ -1,39 +0,0 @@ -dataDir: ../../../Data -tags: - # - bvt-p ((build_sku == 'gpu') or (build_sku == '1bitsgd')) and ((flavor == 'release') if (os == 'windows') else ((flavor == 'debug') ^ (device == 'cpu'))) - - nightly-p ((build_sku == 'gpu') or (build_sku == '1bitsgd')) - -testCases: - Must train epochs in exactly same order and parameters for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Starting Epoch {{integer}} - - learning rate per sample = {{float}} - - momentum = {{float}} - - Epochs must be finished with expected results for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Finished Epoch[{{integer}} of {{integer}}] - - CrossEntropyWithSoftmax = {{float,tolerance=0%}} - - EvalClassificationError = {{float,tolerance=0%}} - - learningRatePerSample = {{float,tolerance=0%}} - - Per-minibatch training results must match for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Epoch[{{integer}} of {{integer}}]-Minibatch[{{integer}}-{{integer}} - - " * {{integer}}; " - - CrossEntropyWithSoftmax = {{float,tolerance=0%}} - - EvalClassificationError = {{float,tolerance=0%}} - - DataParallelSGD training parameters must match for each MPI Rank: - patterns: - - ^MPI Rank {{integer}} - - Starting minibatch loop - - DataParallelSGD training - - myRank = {{integer}} - - numNodes = 3 - - numGradientBits = 64 - - distributed reading is ENABLED - - BufferedAsyncGradientAggregation is ENABLED