diff --git a/Source/SGDLib/SGD.cpp b/Source/SGDLib/SGD.cpp index fdc6ef2a3..074b1ff3f 100644 --- a/Source/SGDLib/SGD.cpp +++ b/Source/SGDLib/SGD.cpp @@ -141,15 +141,15 @@ void SGD::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net, else { LOGPRINTF(stderr, "Training criteria:\n"); - for (const auto& node : criterionNodes) - { - LOGPRINTF(stderr, "\t%ls = %ls\n", node->NodeName().c_str(), node->OperationName().c_str()); - } - if (criterionNodes.empty()) - { - LOGPRINTF(stderr, "\t(none)\n"); - InvalidArgument("TrainOrAdaptModel: No criterion node was specified."); - } + for (const auto& node : criterionNodes) + { + LOGPRINTF(stderr, "\t%ls = %ls\n", node->NodeName().c_str(), node->OperationName().c_str()); + } + if (criterionNodes.empty()) + { + LOGPRINTF(stderr, "\t(none)\n"); + InvalidArgument("TrainOrAdaptModel: No criterion node was specified."); + } } // determine evaluationNodes from GetEvalCriterionNodes(), ensuring each criterion is only logged once @@ -277,10 +277,10 @@ void SGD::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net, { fprintf(stderr, "out of %d parameter tensors and %d nodes with gradient:\n\n", (int)learnableNodes.size(), (int)numNeedsGradient); - for (let nodeDescription : nodesToUpdateDescriptions) - { - LOGPRINTF(stderr, "\t%ls\n", nodeDescription.c_str()); - } + for (let nodeDescription : nodesToUpdateDescriptions) + { + LOGPRINTF(stderr, "\t%ls\n", nodeDescription.c_str()); + } } // one blank line before training progress log @@ -302,16 +302,20 @@ void SGD::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net, for (int i = 0; i < m_numPrevLearnRates; i++) prevLearnRates[i] = -1.0; + m_prevChosenMinibatchSize = m_mbSize[startEpoch]; + + int currentNumGradientBits = 0; // this remembers the last #gradient bits we set for dataParallelSGD (init val 0 has no meaning, just keep compiler happy) if (GetParallelizationMethod() == ParallelizationMethod::dataParallelSGD) { - InitDistGradAgg(evaluationNodes.size(), m_traceLevel); + currentNumGradientBits = m_numGradientBits[startEpoch]; // remember so that we can detect a change + InitDistGradAgg(evaluationNodes.size(), currentNumGradientBits, m_traceLevel); } else if (GetParallelizationMethod() == ParallelizationMethod::modelAveragingSGD || GetParallelizationMethod() == ParallelizationMethod::blockMomentumSGD) { InitModelAggregationHandler(m_syncStatsTrace, net->GetDeviceId()); } - + // precompute mean and invStdDev nodes and save initial model // When no precompute, only save if we did not load the model from a // checkpoint but instead built it from a network description @@ -385,6 +389,14 @@ void SGD::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net, m_mpi->WaitAll(); } + // (re-)initialize 1-bit SGD + if (GetParallelizationMethod() == ParallelizationMethod::dataParallelSGD && + currentNumGradientBits != m_numGradientBits[i]) + { + currentNumGradientBits = m_numGradientBits[i]; + InitDistGradAgg(evaluationNodes.size(), currentNumGradientBits, m_traceLevel); + } + Timer timer; timer.Start(); @@ -464,6 +476,8 @@ void SGD::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net, criterionNodes, evaluationNodes, inputMatrices, learnableNodes, smoothedGradients, smoothedCounts, learningRateAdjustmentFactor); + if (m_traceLevel < 1 && chosenMinibatchSize != m_prevChosenMinibatchSize) + LOGPRINTF(stderr, "Minibatch size adapted to %d.\n", (int)chosenMinibatchSize); m_prevChosenMinibatchSize = chosenMinibatchSize; } else @@ -476,9 +490,11 @@ void SGD::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net, double momentumPerSample = GetMomentumPerSample(i /*BUGBUG workaround:*/, trainSetDataReader->GetNumParallelSequencesForFixingBPTTMode()); // time constant = number of samples after which a contribution has been reduced to e^-1 - double momentumAsTimeConstant = momentumPerSample == 0.0 ? 0.0 - : momentumPerSample >= 1.0 ? 0.0 - : -1.0 / log(momentumPerSample); + double momentumAsTimeConstant = momentumPerSample == 0.0 + ? 0.0 + : momentumPerSample >= 1.0 + ? 0.0 + : -1.0 / log(momentumPerSample); if (m_traceLevel > 0) { fprintf(stderr, "\n"); @@ -863,8 +879,8 @@ size_t SGD::TrainOneEpoch(ComputationNetworkPtr net, LOGPRINTF(stderr, "Starting minibatch loop"); if (useGradientAggregation) { - fprintf(stderr, ", DataParallelSGD training (MyRank = %d, NumNodes = %d, NumGradientBits = %d)", - (int) m_mpi->CurrentNodeRank(), (int) m_mpi->NumNodesInUse(), (int) m_numGradientBits); + fprintf(stderr, ", DataParallelSGD training (myRank = %d, numNodes = %d, numGradientBits = %d)", + (int) m_mpi->CurrentNodeRank(), (int) m_mpi->NumNodesInUse(), (int) m_numGradientBits[epochNumber]); if (m_bufferedAsyncGradientAggregation) fprintf(stderr, ", BufferedAsyncGradientAggregation is ENABLED"); @@ -1754,8 +1770,8 @@ size_t SGD::SearchForBestMinibatchSize(ComputationNetworkPtr net, LOGPRINTF(stderr, " AdaptiveMinibatchSearch Epoch[%d]: Evaluating minibatchSizes %d..%d\n", (int)epochNumber + 1, (int)RoundToMultipleOf64(minMinibatchSize), (int)RoundToMultipleOf64(maxMinibatchSize)); - size_t lastTriedTrialMinibatchSize = 0; - EpochCriterion lastTriedTrialEpochCriterion(0); + size_t lastGoodMinibatchSize = 0; + EpochCriterion lastGoodEpochCriterion(0); for (float trialMinibatchSizeFloat = (float) minMinibatchSize; trialMinibatchSizeFloat <= maxMinibatchSize; trialMinibatchSizeFloat *= minibatchSizeTuningFactor) @@ -1786,15 +1802,15 @@ size_t SGD::SearchForBestMinibatchSize(ComputationNetworkPtr net, // for the first iteration of the loop only, set baseCriterion // to the result we got from TrainOneMiniEpochAndReloadModel(). baseCriterion = epochCriterion; - lastTriedTrialMinibatchSize = trialMinibatchSize; - lastTriedTrialEpochCriterion = baseCriterion; + lastGoodMinibatchSize = trialMinibatchSize; + lastGoodEpochCriterion = baseCriterion; isFirstIteration = false; if (m_traceLevel > 0) { LOGPRINTF(stderr, " AdaptiveMinibatchSearch Epoch[%d]: Computed baseCriterion %.8f for minibatchSize=%d\n", (int)epochNumber + 1, baseCriterion.Average(), (int)trialMinibatchSize); - } + } } else if (!epochCriterion.IsNan() && epochCriterion.Average() > (baseCriterion.Average() * (1.0 + (m_minibatchSearchCriterionErrorMargin / 100.0)))) @@ -1807,8 +1823,8 @@ size_t SGD::SearchForBestMinibatchSize(ComputationNetworkPtr net, } else { - lastTriedTrialMinibatchSize = trialMinibatchSize; - lastTriedTrialEpochCriterion = epochCriterion; + lastGoodMinibatchSize = trialMinibatchSize; + lastGoodEpochCriterion = epochCriterion; if (m_traceLevel > 0 && trialMinibatchSizeFloat * minibatchSizeTuningFactor <= maxMinibatchSize) { LOGPRINTF(stderr, " AdaptiveMinibatchSearch Epoch[%d]: Keep searching... epochCriterion = %.8f vs. baseCriterion = %.8f\n", @@ -1816,10 +1832,12 @@ size_t SGD::SearchForBestMinibatchSize(ComputationNetworkPtr net, } } } - LOGPRINTF(stderr, " AdaptiveMinibatchSearch Epoch[%d]: Search successful. New minibatchSize is %d. epochCriterion = %.8f vs baseCriterion = %.8f\n", - (int)epochNumber+1, (int) lastTriedTrialMinibatchSize, lastTriedTrialEpochCriterion.Average(), baseCriterion.Average()); - - return lastTriedTrialMinibatchSize; + if (m_traceLevel > 0) + { + LOGPRINTF(stderr, " AdaptiveMinibatchSearch Epoch[%d]: Search successful. New minibatchSize is %d. epochCriterion = %.8f vs baseCriterion = %.8f\n", + (int)epochNumber + 1, (int)lastGoodMinibatchSize, lastGoodEpochCriterion.Average(), baseCriterion.Average()); + } + return lastGoodMinibatchSize; } // run training over a small subset of an epoch, used by automatic LR and MB-size tuning @@ -1905,31 +1923,24 @@ void SGD::AttemptUtteranceDerivativeFeatures(ComputationNetworkPtr net } template -void SGD::InitDistGradAgg(int numEvalNodes, int traceLevel) +void SGD::InitDistGradAgg(int numEvalNodes, int numGradientBits, int traceLevel) { - if (GetParallelizationMethod() == ParallelizationMethod::dataParallelSGD) - { - if (m_distGradAgg == nullptr) - { + assert(GetParallelizationMethod() == ParallelizationMethod::dataParallelSGD); + if (traceLevel > 0) + fprintf(stderr, "Initializing dataParallelSGD for %d-bit quantization.\n", numGradientBits); + #ifdef CNTK_PARALLEL_TRAINING_SUPPORT - m_distGradAgg = std::make_shared>(m_mpi, m_numGradientBits, m_zeroThresholdFor1Bit, true /*useQuantizationForSelfStripe*/, m_bufferedAsyncGradientAggregation, traceLevel, m_syncStatsTrace); + m_distGradAgg = std::make_shared>(m_mpi, numGradientBits, m_zeroThresholdFor1Bit, true /*useQuantizationForSelfStripe*/, m_bufferedAsyncGradientAggregation, traceLevel, m_syncStatsTrace); #else - if (m_numGradientBits != (8 * sizeof(ElemType))) - { - RuntimeError("Gradient quantization is unsupported in CNTK binaries built without quantized gradient aggregation support!"); - } - - m_distGradAgg = std::make_shared>(m_mpi, m_bufferedAsyncGradientAggregation, m_syncStatsTrace); -#endif // !CNTK_PARALLEL_TRAINING_SUPPORT - } - - if (m_gradHeader == nullptr) - { - m_gradHeader.reset(DistGradHeader::Create(numEvalNodes), [](DistGradHeader* ptr) { - DistGradHeader::Destroy(ptr); - }); - } + if (numGradientBits != (8 * sizeof(ElemType))) + { + RuntimeError("Gradient quantization is unsupported in CNTK binaries built without quantized gradient aggregation support!"); } + + m_distGradAgg = std::make_shared>(m_mpi, m_bufferedAsyncGradientAggregation, m_syncStatsTrace); +#endif // !CNTK_PARALLEL_TRAINING_SUPPORT + + m_gradHeader.reset(DistGradHeader::Create(numEvalNodes), [](DistGradHeader* ptr) { DistGradHeader::Destroy(ptr); }); } template @@ -2651,7 +2662,7 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType) // parallel training m_parallelizationMethod = ParallelizationMethod::none; - m_numGradientBits = 32; + m_numGradientBits = vector{8 * (int)sizeofElemType}; // means no quantization m_zeroThresholdFor1Bit = true; m_bufferedAsyncGradientAggregation = false; m_enableDistributedMBReading = false; @@ -2682,13 +2693,14 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType) if (configParallelTrain.Exists(L"DataParallelSGD")) { const ConfigRecordType& configDataParallelSGD(configParallelTrain(L"DataParallelSGD", ConfigRecordType::Record())); - size_t defaultGradientBits = 8 * sizeofElemType; - m_numGradientBits = configDataParallelSGD(L"gradientBits", defaultGradientBits); + let defaultGradientBits = 8 * (int)sizeofElemType; + m_numGradientBits = configDataParallelSGD(L"gradientBits", ConfigRecordType::Array(intargvector(vector{defaultGradientBits}))); m_zeroThresholdFor1Bit = configDataParallelSGD(L"useZeroThresholdFor1BitQuantization", true); m_bufferedAsyncGradientAggregation = configDataParallelSGD(L"useBufferedAsyncGradientAggregation", false); - if ( m_numGradientBits < 1 || m_numGradientBits > (8 * sizeofElemType) ) + for (size_t i = 0; i < m_numGradientBits.size(); i++) { - InvalidArgument("gradientBits must be in the range [1, 32] when using precision=float and in range [1, 64] when using precision=double!"); + if (m_numGradientBits[i] < 1 || m_numGradientBits[i] > defaultGradientBits) + InvalidArgument("gradientBits values must be in the range [1, 32] when using precision=float and in range [1, 64] when using precision=double."); } } if (configParallelTrain.Exists(L"ModelAveragingSGD")) diff --git a/Source/SGDLib/SGD.h b/Source/SGDLib/SGD.h index ee4de6796..97d298877 100644 --- a/Source/SGDLib/SGD.h +++ b/Source/SGDLib/SGD.h @@ -264,7 +264,7 @@ protected: int m_syncStatsTrace; // Data parallel SGD training parameters - int m_numGradientBits; + intargvector m_numGradientBits; bool m_bufferedAsyncGradientAggregation; bool m_zeroThresholdFor1Bit; @@ -470,7 +470,7 @@ protected: /*out*/ std::vector& epochEvalErrors, const std::string& prefixMsg = ""); - void InitDistGradAgg(int numEvalNodes, int traceLevel); + void InitDistGradAgg(int numEvalNodes, int numGradientBits, int traceLevel); void InitModelAggregationHandler(int traceLevel, DEVICEID_TYPE devID); public: // UpdateWeights() - actual weight update, implementing various update rules diff --git a/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.linux.cpu.txt b/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.linux.cpu.txt index af717a5cf..2130754ae 100644 --- a/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.linux.cpu.txt +++ b/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.linux.cpu.txt @@ -545,7 +545,7 @@ BlockRandomizer::StartEpoch: epoch 0: frames [0..10000] (first sequence at sampl 08/16/2016 10:01:28: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at sample 10000), data subset 0 of 1 -08/16/2016 10:01:28: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). +08/16/2016 10:01:28: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1). 08/16/2016 10:01:28: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.50722371 * 250; EvalClassificationError = 0.14800000 * 250; time = 0.0397s; samplesPerSecond = 6295.5 08/16/2016 10:01:28: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.45786101 * 250; EvalClassificationError = 0.12800000 * 250; time = 0.0285s; samplesPerSecond = 8776.9 08/16/2016 10:01:28: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.37902995 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0178s; samplesPerSecond = 14020.5 @@ -592,7 +592,7 @@ BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at s 08/16/2016 10:01:29: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples BlockRandomizer::StartEpoch: epoch 2: frames [20000..30000] (first sequence at sample 20000), data subset 0 of 1 -08/16/2016 10:01:29: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). +08/16/2016 10:01:29: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1). 08/16/2016 10:01:29: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18478506 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0216s; samplesPerSecond = 11585.3 08/16/2016 10:01:29: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.12741733 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0184s; samplesPerSecond = 13576.6 08/16/2016 10:01:29: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.17535235 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0235s; samplesPerSecond = 10656.9 diff --git a/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.linux.gpu.txt b/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.linux.gpu.txt index 06fe33c5a..0870c5f14 100644 --- a/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.linux.gpu.txt +++ b/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.linux.gpu.txt @@ -546,7 +546,7 @@ BlockRandomizer::StartEpoch: epoch 0: frames [0..10000] (first sequence at sampl 08/16/2016 10:01:33: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at sample 10000), data subset 0 of 1 -08/16/2016 10:01:33: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). +08/16/2016 10:01:33: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1). 08/16/2016 10:01:33: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.61550018 * 250; EvalClassificationError = 0.27600000 * 250; time = 0.0108s; samplesPerSecond = 23111.8 08/16/2016 10:01:33: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.59409242 * 250; EvalClassificationError = 0.28800000 * 250; time = 0.0094s; samplesPerSecond = 26612.7 08/16/2016 10:01:33: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.53884306 * 250; EvalClassificationError = 0.20400000 * 250; time = 0.0093s; samplesPerSecond = 26890.4 @@ -593,7 +593,7 @@ BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at s 08/16/2016 10:01:33: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples BlockRandomizer::StartEpoch: epoch 2: frames [20000..30000] (first sequence at sample 20000), data subset 0 of 1 -08/16/2016 10:01:33: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). +08/16/2016 10:01:33: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1). 08/16/2016 10:01:33: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18398525 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0097s; samplesPerSecond = 25685.8 08/16/2016 10:01:33: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.12825686 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0095s; samplesPerSecond = 26374.1 08/16/2016 10:01:33: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.17547006 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0095s; samplesPerSecond = 26318.6 diff --git a/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.windows.cpu.txt b/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.windows.cpu.txt index dc4986c86..d3c049d72 100644 --- a/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.windows.cpu.txt +++ b/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.windows.cpu.txt @@ -544,7 +544,7 @@ BlockRandomizer::StartEpoch: epoch 0: frames [0..10000] (first sequence at sampl 08/16/2016 03:19:48: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at sample 10000), data subset 0 of 1 -08/16/2016 03:19:48: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). +08/16/2016 03:19:48: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1). 08/16/2016 03:19:48: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.50509082 * 250; EvalClassificationError = 0.14400000 * 250; time = 0.0250s; samplesPerSecond = 9991.2 08/16/2016 03:19:48: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.45891377 * 250; EvalClassificationError = 0.13200000 * 250; time = 0.0251s; samplesPerSecond = 9958.6 08/16/2016 03:19:48: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.38371187 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0247s; samplesPerSecond = 10117.4 @@ -591,7 +591,7 @@ BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at s 08/16/2016 03:19:49: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples BlockRandomizer::StartEpoch: epoch 2: frames [20000..30000] (first sequence at sample 20000), data subset 0 of 1 -08/16/2016 03:19:49: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). +08/16/2016 03:19:49: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1). 08/16/2016 03:19:49: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18436522 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0246s; samplesPerSecond = 10145.7 08/16/2016 03:19:49: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.12821186 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0251s; samplesPerSecond = 9945.1 08/16/2016 03:19:49: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.17512306 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0248s; samplesPerSecond = 10084.3 diff --git a/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.windows.gpu.txt b/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.windows.gpu.txt index 51f691a8f..057cb1219 100644 --- a/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.windows.gpu.txt +++ b/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.windows.gpu.txt @@ -545,7 +545,7 @@ BlockRandomizer::StartEpoch: epoch 0: frames [0..10000] (first sequence at sampl 08/16/2016 03:19:55: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at sample 10000), data subset 0 of 1 -08/16/2016 03:19:55: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). +08/16/2016 03:19:55: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1). 08/16/2016 03:19:55: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.61550018 * 250; EvalClassificationError = 0.27600000 * 250; time = 0.0399s; samplesPerSecond = 6268.0 08/16/2016 03:19:55: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.59409242 * 250; EvalClassificationError = 0.28800000 * 250; time = 0.0380s; samplesPerSecond = 6577.0 08/16/2016 03:19:55: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.53884306 * 250; EvalClassificationError = 0.20400000 * 250; time = 0.0379s; samplesPerSecond = 6604.0 @@ -592,7 +592,7 @@ BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at s 08/16/2016 03:19:56: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples BlockRandomizer::StartEpoch: epoch 2: frames [20000..30000] (first sequence at sample 20000), data subset 0 of 1 -08/16/2016 03:19:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). +08/16/2016 03:19:56: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1). 08/16/2016 03:19:56: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18398525 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0376s; samplesPerSecond = 6641.3 08/16/2016 03:19:56: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.12825686 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0376s; samplesPerSecond = 6653.0 08/16/2016 03:19:56: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.17547006 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0374s; samplesPerSecond = 6692.7 diff --git a/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.linux.cpu.txt b/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.linux.cpu.txt index 94b7e27b8..417dfc4ab 100644 --- a/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.linux.cpu.txt +++ b/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.linux.cpu.txt @@ -423,7 +423,7 @@ requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 1 08/16/2016 10:01:43: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -08/16/2016 10:01:43: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED. +08/16/2016 10:01:43: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1), distributed reading is ENABLED. 08/16/2016 10:01:44: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.45117986 * 2048; EvalClassificationError = 0.92187500 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.209966s 08/16/2016 10:01:44: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn' 08/16/2016 10:01:44: CNTKCommandTrainEnd: speechTrain diff --git a/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.linux.gpu.txt b/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.linux.gpu.txt index 57a1008b7..3364ac68d 100644 --- a/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.linux.gpu.txt +++ b/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.linux.gpu.txt @@ -424,7 +424,7 @@ requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 1 08/16/2016 10:01:46: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -08/16/2016 10:01:46: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED. +08/16/2016 10:01:46: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1), distributed reading is ENABLED. 08/16/2016 10:01:46: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.41144794 * 2048; EvalClassificationError = 0.92773438 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.023072s 08/16/2016 10:01:46: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn' 08/16/2016 10:01:46: CNTKCommandTrainEnd: speechTrain diff --git a/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.windows.cpu.txt b/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.windows.cpu.txt index 3d727f95b..7c8948128 100644 --- a/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.windows.cpu.txt +++ b/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.windows.cpu.txt @@ -422,7 +422,7 @@ requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 1 08/16/2016 03:20:15: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -08/16/2016 03:20:15: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED. +08/16/2016 03:20:15: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1), distributed reading is ENABLED. 08/16/2016 03:20:15: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.46427900 * 2048; EvalClassificationError = 0.91259766 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.28059s 08/16/2016 03:20:15: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn' 08/16/2016 03:20:15: CNTKCommandTrainEnd: speechTrain diff --git a/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.windows.gpu.txt b/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.windows.gpu.txt index 0dc5bf2bc..6230b9bd3 100644 --- a/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.windows.gpu.txt +++ b/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.windows.gpu.txt @@ -423,7 +423,7 @@ requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 1 08/16/2016 03:20:21: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses -08/16/2016 03:20:21: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED. +08/16/2016 03:20:21: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1), distributed reading is ENABLED. 08/16/2016 03:20:21: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.41144794 * 2048; EvalClassificationError = 0.92773438 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.05551s 08/16/2016 03:20:21: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn' 08/16/2016 03:20:21: CNTKCommandTrainEnd: speechTrain diff --git a/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.cpu.txt b/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.cpu.txt index cea41589b..1bd480f84 100644 --- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.cpu.txt +++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.cpu.txt @@ -477,7 +477,7 @@ MPI Rank 0: MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:08: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:08: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69938312 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1177s; samplesPerSecond = 2124.5 MPI Rank 0: 08/16/2016 09:57:08: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71368781 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0420s; samplesPerSecond = 5949.7 MPI Rank 0: 08/16/2016 09:57:09: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72806030 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0598s; samplesPerSecond = 4180.3 @@ -523,7 +523,7 @@ MPI Rank 0: 08/16/2016 09:57:10: SGD: Saving checkpoint model '/tmp/cntk-test-20 MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:10: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:10: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.46767057 * 250; EvalClassificationError = 0.18400000 * 250; time = 0.0401s; samplesPerSecond = 6237.4 MPI Rank 0: 08/16/2016 09:57:10: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.39369585 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0482s; samplesPerSecond = 5186.5 MPI Rank 0: 08/16/2016 09:57:11: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.32550048 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0802s; samplesPerSecond = 3116.5 @@ -569,7 +569,7 @@ MPI Rank 0: 08/16/2016 09:57:12: SGD: Saving checkpoint model '/tmp/cntk-test-20 MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:12: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:12: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12573638 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0440s; samplesPerSecond = 5677.8 MPI Rank 0: 08/16/2016 09:57:12: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17793506 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0529s; samplesPerSecond = 4728.4 MPI Rank 0: 08/16/2016 09:57:12: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14424050 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0229s; samplesPerSecond = 10940.4 @@ -615,7 +615,7 @@ MPI Rank 0: 08/16/2016 09:57:14: SGD: Saving checkpoint model '/tmp/cntk-test-20 MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:14: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:14: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12378899 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0885s; samplesPerSecond = 2825.7 MPI Rank 0: 08/16/2016 09:57:14: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18072658 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0204s; samplesPerSecond = 12260.9 MPI Rank 0: 08/16/2016 09:57:14: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14257652 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0417s; samplesPerSecond = 5988.2 @@ -1025,7 +1025,7 @@ MPI Rank 1: MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:08: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:08: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69938312 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1176s; samplesPerSecond = 2125.0 MPI Rank 1: 08/16/2016 09:57:08: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71368781 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0418s; samplesPerSecond = 5982.7 MPI Rank 1: 08/16/2016 09:57:09: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72806030 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0600s; samplesPerSecond = 4163.8 @@ -1070,7 +1070,7 @@ MPI Rank 1: 08/16/2016 09:57:10: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:10: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:10: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.46767057 * 250; EvalClassificationError = 0.18400000 * 250; time = 0.0401s; samplesPerSecond = 6236.7 MPI Rank 1: 08/16/2016 09:57:10: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.39369585 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0482s; samplesPerSecond = 5187.4 MPI Rank 1: 08/16/2016 09:57:11: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.32550048 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0802s; samplesPerSecond = 3116.5 @@ -1115,7 +1115,7 @@ MPI Rank 1: 08/16/2016 09:57:12: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:12: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:12: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12573638 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0440s; samplesPerSecond = 5677.2 MPI Rank 1: 08/16/2016 09:57:12: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17793506 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0529s; samplesPerSecond = 4728.5 MPI Rank 1: 08/16/2016 09:57:12: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14424050 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0229s; samplesPerSecond = 10939.0 @@ -1160,7 +1160,7 @@ MPI Rank 1: 08/16/2016 09:57:14: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:14: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:14: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12378899 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0886s; samplesPerSecond = 2823.0 MPI Rank 1: 08/16/2016 09:57:14: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18072658 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0204s; samplesPerSecond = 12265.1 MPI Rank 1: 08/16/2016 09:57:14: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14257652 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0418s; samplesPerSecond = 5987.6 @@ -1569,7 +1569,7 @@ MPI Rank 2: MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:08: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:08: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69938312 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1176s; samplesPerSecond = 2126.1 MPI Rank 2: 08/16/2016 09:57:08: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71368781 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0418s; samplesPerSecond = 5983.7 MPI Rank 2: 08/16/2016 09:57:09: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72806030 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0602s; samplesPerSecond = 4149.9 @@ -1614,7 +1614,7 @@ MPI Rank 2: 08/16/2016 09:57:10: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:10: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:10: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.46767057 * 250; EvalClassificationError = 0.18400000 * 250; time = 0.0401s; samplesPerSecond = 6237.5 MPI Rank 2: 08/16/2016 09:57:10: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.39369585 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0482s; samplesPerSecond = 5187.5 MPI Rank 2: 08/16/2016 09:57:11: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.32550048 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0802s; samplesPerSecond = 3116.4 @@ -1659,7 +1659,7 @@ MPI Rank 2: 08/16/2016 09:57:12: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:12: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:12: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12573638 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0440s; samplesPerSecond = 5677.9 MPI Rank 2: 08/16/2016 09:57:12: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17793506 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0529s; samplesPerSecond = 4728.4 MPI Rank 2: 08/16/2016 09:57:12: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14424050 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0229s; samplesPerSecond = 10940.4 @@ -1704,7 +1704,7 @@ MPI Rank 2: 08/16/2016 09:57:14: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:14: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:14: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12378899 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0885s; samplesPerSecond = 2823.4 MPI Rank 2: 08/16/2016 09:57:14: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18072658 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0204s; samplesPerSecond = 12262.1 MPI Rank 2: 08/16/2016 09:57:14: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14257652 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0418s; samplesPerSecond = 5988.0 @@ -2113,7 +2113,7 @@ MPI Rank 3: MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:08: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:08: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69938312 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1177s; samplesPerSecond = 2124.5 MPI Rank 3: 08/16/2016 09:57:08: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71368781 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0418s; samplesPerSecond = 5982.9 MPI Rank 3: 08/16/2016 09:57:09: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72806030 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0600s; samplesPerSecond = 4163.8 @@ -2158,7 +2158,7 @@ MPI Rank 3: 08/16/2016 09:57:10: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:10: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:10: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.46767057 * 250; EvalClassificationError = 0.18400000 * 250; time = 0.0401s; samplesPerSecond = 6237.1 MPI Rank 3: 08/16/2016 09:57:10: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.39369585 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0482s; samplesPerSecond = 5187.6 MPI Rank 3: 08/16/2016 09:57:11: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.32550048 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0802s; samplesPerSecond = 3116.5 @@ -2203,7 +2203,7 @@ MPI Rank 3: 08/16/2016 09:57:12: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:12: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:12: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12573638 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0440s; samplesPerSecond = 5676.9 MPI Rank 3: 08/16/2016 09:57:12: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17793506 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0529s; samplesPerSecond = 4729.4 MPI Rank 3: 08/16/2016 09:57:12: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14424050 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0229s; samplesPerSecond = 10939.5 @@ -2248,7 +2248,7 @@ MPI Rank 3: 08/16/2016 09:57:14: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:14: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:14: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12378899 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0886s; samplesPerSecond = 2823.1 MPI Rank 3: 08/16/2016 09:57:14: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18072658 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0204s; samplesPerSecond = 12269.9 MPI Rank 3: 08/16/2016 09:57:14: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14257652 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0418s; samplesPerSecond = 5987.9 diff --git a/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.gpu.txt b/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.gpu.txt index b91e936ec..e9aac9ffd 100644 --- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.gpu.txt +++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.gpu.txt @@ -478,7 +478,7 @@ MPI Rank 0: MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:23: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:23: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0303s; samplesPerSecond = 8248.6 MPI Rank 0: 08/16/2016 09:57:23: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0268s; samplesPerSecond = 9325.6 MPI Rank 0: 08/16/2016 09:57:23: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0267s; samplesPerSecond = 9367.9 @@ -524,7 +524,7 @@ MPI Rank 0: 08/16/2016 09:57:24: SGD: Saving checkpoint model '/tmp/cntk-test-20 MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:24: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:24: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0236s; samplesPerSecond = 10595.0 MPI Rank 0: 08/16/2016 09:57:24: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10573.5 MPI Rank 0: 08/16/2016 09:57:24: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0236s; samplesPerSecond = 10596.8 @@ -570,7 +570,7 @@ MPI Rank 0: 08/16/2016 09:57:25: SGD: Saving checkpoint model '/tmp/cntk-test-20 MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:25: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:25: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0239s; samplesPerSecond = 10472.5 MPI Rank 0: 08/16/2016 09:57:25: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0239s; samplesPerSecond = 10455.9 MPI Rank 0: 08/16/2016 09:57:25: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0237s; samplesPerSecond = 10531.2 @@ -616,7 +616,7 @@ MPI Rank 0: 08/16/2016 09:57:26: SGD: Saving checkpoint model '/tmp/cntk-test-20 MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:26: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:26: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0238s; samplesPerSecond = 10499.8 MPI Rank 0: 08/16/2016 09:57:26: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0237s; samplesPerSecond = 10549.4 MPI Rank 0: 08/16/2016 09:57:26: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0238s; samplesPerSecond = 10492.7 @@ -1027,7 +1027,7 @@ MPI Rank 1: MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:23: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:23: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0303s; samplesPerSecond = 8241.3 MPI Rank 1: 08/16/2016 09:57:23: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0268s; samplesPerSecond = 9325.6 MPI Rank 1: 08/16/2016 09:57:23: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0267s; samplesPerSecond = 9365.8 @@ -1072,7 +1072,7 @@ MPI Rank 1: 08/16/2016 09:57:24: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:24: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:24: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0236s; samplesPerSecond = 10591.9 MPI Rank 1: 08/16/2016 09:57:24: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10580.7 MPI Rank 1: 08/16/2016 09:57:24: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0236s; samplesPerSecond = 10592.3 @@ -1117,7 +1117,7 @@ MPI Rank 1: 08/16/2016 09:57:25: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:25: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:25: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0239s; samplesPerSecond = 10468.1 MPI Rank 1: 08/16/2016 09:57:25: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0239s; samplesPerSecond = 10454.6 MPI Rank 1: 08/16/2016 09:57:25: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0237s; samplesPerSecond = 10534.3 @@ -1162,7 +1162,7 @@ MPI Rank 1: 08/16/2016 09:57:26: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:26: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:26: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0238s; samplesPerSecond = 10494.5 MPI Rank 1: 08/16/2016 09:57:26: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0237s; samplesPerSecond = 10551.6 MPI Rank 1: 08/16/2016 09:57:26: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0238s; samplesPerSecond = 10494.5 @@ -1572,7 +1572,7 @@ MPI Rank 2: MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:23: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:23: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0303s; samplesPerSecond = 8252.7 MPI Rank 2: 08/16/2016 09:57:23: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0268s; samplesPerSecond = 9325.9 MPI Rank 2: 08/16/2016 09:57:23: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0267s; samplesPerSecond = 9361.5 @@ -1617,7 +1617,7 @@ MPI Rank 2: 08/16/2016 09:57:24: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:24: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:24: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0236s; samplesPerSecond = 10602.2 MPI Rank 2: 08/16/2016 09:57:24: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10581.6 MPI Rank 2: 08/16/2016 09:57:24: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0236s; samplesPerSecond = 10587.8 @@ -1662,7 +1662,7 @@ MPI Rank 2: 08/16/2016 09:57:25: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:25: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:25: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0239s; samplesPerSecond = 10475.6 MPI Rank 2: 08/16/2016 09:57:25: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0239s; samplesPerSecond = 10456.3 MPI Rank 2: 08/16/2016 09:57:25: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0237s; samplesPerSecond = 10532.5 @@ -1707,7 +1707,7 @@ MPI Rank 2: 08/16/2016 09:57:26: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:26: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:26: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0238s; samplesPerSecond = 10496.7 MPI Rank 2: 08/16/2016 09:57:26: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0237s; samplesPerSecond = 10554.3 MPI Rank 2: 08/16/2016 09:57:26: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0238s; samplesPerSecond = 10492.3 @@ -2117,7 +2117,7 @@ MPI Rank 3: MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:23: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:23: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0303s; samplesPerSecond = 8238.9 MPI Rank 3: 08/16/2016 09:57:23: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0268s; samplesPerSecond = 9328.0 MPI Rank 3: 08/16/2016 09:57:23: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0267s; samplesPerSecond = 9366.5 @@ -2162,7 +2162,7 @@ MPI Rank 3: 08/16/2016 09:57:24: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:24: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:24: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0236s; samplesPerSecond = 10595.0 MPI Rank 3: 08/16/2016 09:57:24: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10578.4 MPI Rank 3: 08/16/2016 09:57:24: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0236s; samplesPerSecond = 10591.9 @@ -2207,7 +2207,7 @@ MPI Rank 3: 08/16/2016 09:57:25: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:25: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:25: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0239s; samplesPerSecond = 10468.6 MPI Rank 3: 08/16/2016 09:57:25: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0239s; samplesPerSecond = 10456.8 MPI Rank 3: 08/16/2016 09:57:25: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0238s; samplesPerSecond = 10485.7 @@ -2252,7 +2252,7 @@ MPI Rank 3: 08/16/2016 09:57:26: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:26: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:26: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0238s; samplesPerSecond = 10489.2 MPI Rank 3: 08/16/2016 09:57:26: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0237s; samplesPerSecond = 10552.5 MPI Rank 3: 08/16/2016 09:57:26: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0238s; samplesPerSecond = 10494.9 diff --git a/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.windows.cpu.txt b/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.windows.cpu.txt index 070cc276e..071b1154f 100644 --- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.windows.cpu.txt +++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.windows.cpu.txt @@ -474,7 +474,7 @@ MPI Rank 0: MPI Rank 0: MPI Rank 0: 08/16/2016 03:00:56: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:00:57: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70086032 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0154s; samplesPerSecond = 16240.1 MPI Rank 0: 08/16/2016 03:00:57: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0117s; samplesPerSecond = 21374.8 MPI Rank 0: 08/16/2016 03:00:57: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.73052449 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0118s; samplesPerSecond = 21211.6 @@ -520,7 +520,7 @@ MPI Rank 0: 08/16/2016 03:00:57: SGD: Saving checkpoint model 'C:\Users\svcphil\ MPI Rank 0: MPI Rank 0: 08/16/2016 03:00:57: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:00:57: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.34943594 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0115s; samplesPerSecond = 21708.9 MPI Rank 0: 08/16/2016 03:00:57: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.29916586 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0117s; samplesPerSecond = 21413.3 MPI Rank 0: 08/16/2016 03:00:57: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.24878117 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0118s; samplesPerSecond = 21258.5 @@ -566,7 +566,7 @@ MPI Rank 0: 08/16/2016 03:00:57: SGD: Saving checkpoint model 'C:\Users\svcphil\ MPI Rank 0: MPI Rank 0: 08/16/2016 03:00:57: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:00:57: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12580242 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0103s; samplesPerSecond = 24267.1 MPI Rank 0: 08/16/2016 03:00:57: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0106s; samplesPerSecond = 23649.6 MPI Rank 0: 08/16/2016 03:00:58: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14339010 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0104s; samplesPerSecond = 24047.7 @@ -612,7 +612,7 @@ MPI Rank 0: 08/16/2016 03:00:58: SGD: Saving checkpoint model 'C:\Users\svcphil\ MPI Rank 0: MPI Rank 0: 08/16/2016 03:00:58: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:00:58: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0105s; samplesPerSecond = 23753.0 MPI Rank 0: 08/16/2016 03:00:58: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18118390 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0104s; samplesPerSecond = 24149.9 MPI Rank 0: 08/16/2016 03:00:58: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0102s; samplesPerSecond = 24478.6 @@ -1023,7 +1023,7 @@ MPI Rank 1: MPI Rank 1: MPI Rank 1: 08/16/2016 03:00:56: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:00:57: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70086032 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0156s; samplesPerSecond = 15976.5 MPI Rank 1: 08/16/2016 03:00:57: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0116s; samplesPerSecond = 21485.0 MPI Rank 1: 08/16/2016 03:00:57: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.73052449 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0119s; samplesPerSecond = 21093.5 @@ -1068,7 +1068,7 @@ MPI Rank 1: 08/16/2016 03:00:57: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 03:00:57: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:00:57: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.34943594 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0115s; samplesPerSecond = 21699.5 MPI Rank 1: 08/16/2016 03:00:57: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.29916586 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0117s; samplesPerSecond = 21373.0 MPI Rank 1: 08/16/2016 03:00:57: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.24878117 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0118s; samplesPerSecond = 21139.9 @@ -1113,7 +1113,7 @@ MPI Rank 1: 08/16/2016 03:00:57: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 03:00:57: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:00:57: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12580242 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0104s; samplesPerSecond = 24087.1 MPI Rank 1: 08/16/2016 03:00:57: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0106s; samplesPerSecond = 23672.0 MPI Rank 1: 08/16/2016 03:00:58: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14339010 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0105s; samplesPerSecond = 23852.7 @@ -1158,7 +1158,7 @@ MPI Rank 1: 08/16/2016 03:00:58: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 03:00:58: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:00:58: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0105s; samplesPerSecond = 23764.3 MPI Rank 1: 08/16/2016 03:00:58: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18118390 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0103s; samplesPerSecond = 24224.8 MPI Rank 1: 08/16/2016 03:00:58: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0102s; samplesPerSecond = 24457.1 @@ -1568,7 +1568,7 @@ MPI Rank 2: MPI Rank 2: MPI Rank 2: 08/16/2016 03:00:56: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:00:57: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70086032 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0156s; samplesPerSecond = 15999.0 MPI Rank 2: 08/16/2016 03:00:57: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0117s; samplesPerSecond = 21343.8 MPI Rank 2: 08/16/2016 03:00:57: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.73052449 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0118s; samplesPerSecond = 21235.0 @@ -1613,7 +1613,7 @@ MPI Rank 2: 08/16/2016 03:00:57: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 03:00:57: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:00:57: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.34943594 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0115s; samplesPerSecond = 21822.6 MPI Rank 2: 08/16/2016 03:00:57: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.29916586 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0117s; samplesPerSecond = 21393.1 MPI Rank 2: 08/16/2016 03:00:57: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.24878117 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0118s; samplesPerSecond = 21120.2 @@ -1658,7 +1658,7 @@ MPI Rank 2: 08/16/2016 03:00:57: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 03:00:57: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:00:57: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12580242 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0104s; samplesPerSecond = 23987.7 MPI Rank 2: 08/16/2016 03:00:57: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0106s; samplesPerSecond = 23636.2 MPI Rank 2: 08/16/2016 03:00:58: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14339010 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0104s; samplesPerSecond = 24038.5 @@ -1703,7 +1703,7 @@ MPI Rank 2: 08/16/2016 03:00:58: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 03:00:58: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:00:58: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0105s; samplesPerSecond = 23882.3 MPI Rank 2: 08/16/2016 03:00:58: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18118390 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0103s; samplesPerSecond = 24227.2 MPI Rank 2: 08/16/2016 03:00:58: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0102s; samplesPerSecond = 24457.1 @@ -2113,7 +2113,7 @@ MPI Rank 3: MPI Rank 3: MPI Rank 3: 08/16/2016 03:00:56: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:00:57: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70086032 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0156s; samplesPerSecond = 16068.9 MPI Rank 3: 08/16/2016 03:00:57: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0116s; samplesPerSecond = 21475.8 MPI Rank 3: 08/16/2016 03:00:57: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.73052449 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0118s; samplesPerSecond = 21148.8 @@ -2158,7 +2158,7 @@ MPI Rank 3: 08/16/2016 03:00:57: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 03:00:57: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:00:57: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.34943594 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0115s; samplesPerSecond = 21716.5 MPI Rank 3: 08/16/2016 03:00:57: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.29916586 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0117s; samplesPerSecond = 21389.5 MPI Rank 3: 08/16/2016 03:00:57: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.24878117 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0117s; samplesPerSecond = 21303.8 @@ -2203,7 +2203,7 @@ MPI Rank 3: 08/16/2016 03:00:57: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 03:00:57: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:00:57: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12580242 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0103s; samplesPerSecond = 24255.4 MPI Rank 3: 08/16/2016 03:00:57: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0106s; samplesPerSecond = 23683.2 MPI Rank 3: 08/16/2016 03:00:58: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14339010 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0105s; samplesPerSecond = 23900.6 @@ -2248,7 +2248,7 @@ MPI Rank 3: 08/16/2016 03:00:58: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 03:00:58: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:00:58: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0104s; samplesPerSecond = 23971.6 MPI Rank 3: 08/16/2016 03:00:58: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18118390 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0104s; samplesPerSecond = 24096.4 MPI Rank 3: 08/16/2016 03:00:58: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0102s; samplesPerSecond = 24471.4 diff --git a/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.windows.gpu.txt b/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.windows.gpu.txt index 143d038e0..26e2bd3e9 100644 --- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.windows.gpu.txt +++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.windows.gpu.txt @@ -475,7 +475,7 @@ MPI Rank 0: MPI Rank 0: MPI Rank 0: 08/16/2016 03:01:13: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:01:13: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0776s; samplesPerSecond = 3220.8 MPI Rank 0: 08/16/2016 03:01:13: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0484s; samplesPerSecond = 5166.8 MPI Rank 0: 08/16/2016 03:01:13: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0483s; samplesPerSecond = 5176.1 @@ -521,7 +521,7 @@ MPI Rank 0: 08/16/2016 03:01:15: SGD: Saving checkpoint model 'C:\Users\svcphil\ MPI Rank 0: MPI Rank 0: 08/16/2016 03:01:15: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:01:15: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0460s; samplesPerSecond = 5438.9 MPI Rank 0: 08/16/2016 03:01:15: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0451s; samplesPerSecond = 5547.3 MPI Rank 0: 08/16/2016 03:01:15: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0449s; samplesPerSecond = 5566.2 @@ -567,7 +567,7 @@ MPI Rank 0: 08/16/2016 03:01:17: SGD: Saving checkpoint model 'C:\Users\svcphil\ MPI Rank 0: MPI Rank 0: 08/16/2016 03:01:17: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:01:17: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0445s; samplesPerSecond = 5620.9 MPI Rank 0: 08/16/2016 03:01:17: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0441s; samplesPerSecond = 5668.3 MPI Rank 0: 08/16/2016 03:01:17: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0444s; samplesPerSecond = 5630.6 @@ -613,7 +613,7 @@ MPI Rank 0: 08/16/2016 03:01:18: SGD: Saving checkpoint model 'C:\Users\svcphil\ MPI Rank 0: MPI Rank 0: 08/16/2016 03:01:18: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:01:18: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0437s; samplesPerSecond = 5715.9 MPI Rank 0: 08/16/2016 03:01:19: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0443s; samplesPerSecond = 5641.8 MPI Rank 0: 08/16/2016 03:01:19: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0444s; samplesPerSecond = 5627.5 @@ -1025,7 +1025,7 @@ MPI Rank 1: MPI Rank 1: MPI Rank 1: 08/16/2016 03:01:13: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:01:13: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0775s; samplesPerSecond = 3223.9 MPI Rank 1: 08/16/2016 03:01:13: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0484s; samplesPerSecond = 5167.1 MPI Rank 1: 08/16/2016 03:01:13: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0485s; samplesPerSecond = 5157.7 @@ -1070,7 +1070,7 @@ MPI Rank 1: 08/16/2016 03:01:15: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 03:01:15: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:01:15: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0459s; samplesPerSecond = 5449.4 MPI Rank 1: 08/16/2016 03:01:15: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0452s; samplesPerSecond = 5529.1 MPI Rank 1: 08/16/2016 03:01:15: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0449s; samplesPerSecond = 5571.8 @@ -1115,7 +1115,7 @@ MPI Rank 1: 08/16/2016 03:01:17: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 03:01:17: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:01:17: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0444s; samplesPerSecond = 5633.8 MPI Rank 1: 08/16/2016 03:01:17: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0443s; samplesPerSecond = 5646.9 MPI Rank 1: 08/16/2016 03:01:17: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0442s; samplesPerSecond = 5653.7 @@ -1160,7 +1160,7 @@ MPI Rank 1: 08/16/2016 03:01:18: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 03:01:18: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:01:18: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0439s; samplesPerSecond = 5701.0 MPI Rank 1: 08/16/2016 03:01:19: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0441s; samplesPerSecond = 5671.1 MPI Rank 1: 08/16/2016 03:01:19: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0444s; samplesPerSecond = 5624.4 @@ -1571,7 +1571,7 @@ MPI Rank 2: MPI Rank 2: MPI Rank 2: 08/16/2016 03:01:13: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:01:13: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0775s; samplesPerSecond = 3225.3 MPI Rank 2: 08/16/2016 03:01:13: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0485s; samplesPerSecond = 5150.4 MPI Rank 2: 08/16/2016 03:01:13: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0484s; samplesPerSecond = 5163.0 @@ -1616,7 +1616,7 @@ MPI Rank 2: 08/16/2016 03:01:15: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 03:01:15: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:01:15: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0458s; samplesPerSecond = 5455.7 MPI Rank 2: 08/16/2016 03:01:15: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0453s; samplesPerSecond = 5514.5 MPI Rank 2: 08/16/2016 03:01:15: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0447s; samplesPerSecond = 5587.2 @@ -1661,7 +1661,7 @@ MPI Rank 2: 08/16/2016 03:01:17: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 03:01:17: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:01:17: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0445s; samplesPerSecond = 5622.7 MPI Rank 2: 08/16/2016 03:01:17: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0441s; samplesPerSecond = 5672.9 MPI Rank 2: 08/16/2016 03:01:17: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0445s; samplesPerSecond = 5622.4 @@ -1706,7 +1706,7 @@ MPI Rank 2: 08/16/2016 03:01:18: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 03:01:18: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:01:18: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0437s; samplesPerSecond = 5716.1 MPI Rank 2: 08/16/2016 03:01:19: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0444s; samplesPerSecond = 5635.2 MPI Rank 2: 08/16/2016 03:01:19: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0443s; samplesPerSecond = 5646.7 @@ -2117,7 +2117,7 @@ MPI Rank 3: MPI Rank 3: MPI Rank 3: 08/16/2016 03:01:13: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:01:13: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0769s; samplesPerSecond = 3252.2 MPI Rank 3: 08/16/2016 03:01:13: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0483s; samplesPerSecond = 5174.1 MPI Rank 3: 08/16/2016 03:01:13: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0485s; samplesPerSecond = 5155.8 @@ -2162,7 +2162,7 @@ MPI Rank 3: 08/16/2016 03:01:15: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 03:01:15: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:01:15: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0459s; samplesPerSecond = 5441.9 MPI Rank 3: 08/16/2016 03:01:15: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0447s; samplesPerSecond = 5588.3 MPI Rank 3: 08/16/2016 03:01:15: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0447s; samplesPerSecond = 5598.4 @@ -2207,7 +2207,7 @@ MPI Rank 3: 08/16/2016 03:01:17: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 03:01:17: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:01:17: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0439s; samplesPerSecond = 5698.5 MPI Rank 3: 08/16/2016 03:01:17: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0447s; samplesPerSecond = 5587.8 MPI Rank 3: 08/16/2016 03:01:17: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0438s; samplesPerSecond = 5712.8 @@ -2252,7 +2252,7 @@ MPI Rank 3: 08/16/2016 03:01:18: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 03:01:18: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:01:18: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0433s; samplesPerSecond = 5776.6 MPI Rank 3: 08/16/2016 03:01:19: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0442s; samplesPerSecond = 5659.4 MPI Rank 3: 08/16/2016 03:01:19: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0450s; samplesPerSecond = 5553.0 diff --git a/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/testcases.yml b/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/testcases.yml index f4b822025..365455d74 100644 --- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/testcases.yml +++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/testcases.yml @@ -35,6 +35,6 @@ testCases: - ^MPI Rank {{integer}} - Starting minibatch loop - DataParallelSGD training - - MyRank = {{integer}} - - NumNodes = 4 - - NumGradientBits = 64 + - myRank = {{integer}} + - numNodes = 4 + - numGradientBits = 64 diff --git a/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.cpu.txt b/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.cpu.txt index 41a8ece30..3e709fddd 100644 --- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.cpu.txt +++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.cpu.txt @@ -477,7 +477,7 @@ MPI Rank 0: MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:31: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:31: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69922868 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1682s; samplesPerSecond = 1485.9 MPI Rank 0: 08/16/2016 09:57:31: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71203584 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0449s; samplesPerSecond = 5569.9 MPI Rank 0: 08/16/2016 09:57:31: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72631286 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0442s; samplesPerSecond = 5659.4 @@ -523,7 +523,7 @@ MPI Rank 0: 08/16/2016 09:57:33: SGD: Saving checkpoint model '/tmp/cntk-test-20 MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:33: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:33: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.31415305 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0654s; samplesPerSecond = 3824.3 MPI Rank 0: 08/16/2016 09:57:33: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.26920577 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0495s; samplesPerSecond = 5055.2 MPI Rank 0: 08/16/2016 09:57:33: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.22349829 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0735s; samplesPerSecond = 3401.0 @@ -569,7 +569,7 @@ MPI Rank 0: 08/16/2016 09:57:35: SGD: Saving checkpoint model '/tmp/cntk-test-20 MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:35: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:35: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12535183 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0616s; samplesPerSecond = 4060.4 MPI Rank 0: 08/16/2016 09:57:35: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17861531 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0444s; samplesPerSecond = 5635.1 MPI Rank 0: 08/16/2016 09:57:35: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14359719 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0514s; samplesPerSecond = 4860.2 @@ -615,7 +615,7 @@ MPI Rank 0: 08/16/2016 09:57:37: SGD: Saving checkpoint model '/tmp/cntk-test-20 MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:37: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:37: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12387404 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0457s; samplesPerSecond = 5476.1 MPI Rank 0: 08/16/2016 09:57:37: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18078590 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0465s; samplesPerSecond = 5374.8 MPI Rank 0: 08/16/2016 09:57:37: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14225625 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0317s; samplesPerSecond = 7886.9 @@ -1025,7 +1025,7 @@ MPI Rank 1: MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:31: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:31: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69922868 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1683s; samplesPerSecond = 1485.7 MPI Rank 1: 08/16/2016 09:57:31: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71203584 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0449s; samplesPerSecond = 5571.0 MPI Rank 1: 08/16/2016 09:57:31: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72631286 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0442s; samplesPerSecond = 5659.4 @@ -1070,7 +1070,7 @@ MPI Rank 1: 08/16/2016 09:57:33: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:33: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:33: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.31415305 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0654s; samplesPerSecond = 3823.9 MPI Rank 1: 08/16/2016 09:57:33: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.26920577 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0495s; samplesPerSecond = 5055.5 MPI Rank 1: 08/16/2016 09:57:33: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.22349829 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0735s; samplesPerSecond = 3401.1 @@ -1115,7 +1115,7 @@ MPI Rank 1: 08/16/2016 09:57:35: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:35: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:35: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12535183 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0616s; samplesPerSecond = 4060.0 MPI Rank 1: 08/16/2016 09:57:35: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17861531 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0444s; samplesPerSecond = 5634.6 MPI Rank 1: 08/16/2016 09:57:35: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14359719 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0514s; samplesPerSecond = 4859.8 @@ -1160,7 +1160,7 @@ MPI Rank 1: 08/16/2016 09:57:37: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:37: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:37: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12387404 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0457s; samplesPerSecond = 5474.7 MPI Rank 1: 08/16/2016 09:57:37: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18078590 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0465s; samplesPerSecond = 5375.0 MPI Rank 1: 08/16/2016 09:57:37: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14225625 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0317s; samplesPerSecond = 7886.7 @@ -1569,7 +1569,7 @@ MPI Rank 2: MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:31: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:31: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69922868 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1682s; samplesPerSecond = 1486.1 MPI Rank 2: 08/16/2016 09:57:31: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71203584 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0449s; samplesPerSecond = 5570.0 MPI Rank 2: 08/16/2016 09:57:31: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72631286 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0442s; samplesPerSecond = 5658.7 @@ -1614,7 +1614,7 @@ MPI Rank 2: 08/16/2016 09:57:33: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:33: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:33: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.31415305 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0654s; samplesPerSecond = 3824.5 MPI Rank 2: 08/16/2016 09:57:33: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.26920577 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0495s; samplesPerSecond = 5055.0 MPI Rank 2: 08/16/2016 09:57:33: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.22349829 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0735s; samplesPerSecond = 3401.3 @@ -1659,7 +1659,7 @@ MPI Rank 2: 08/16/2016 09:57:35: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:35: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:35: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12535183 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0616s; samplesPerSecond = 4060.4 MPI Rank 2: 08/16/2016 09:57:35: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17861531 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0444s; samplesPerSecond = 5634.9 MPI Rank 2: 08/16/2016 09:57:35: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14359719 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0515s; samplesPerSecond = 4851.7 @@ -1704,7 +1704,7 @@ MPI Rank 2: 08/16/2016 09:57:37: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:37: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:37: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12387404 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0457s; samplesPerSecond = 5475.9 MPI Rank 2: 08/16/2016 09:57:37: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18078590 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0465s; samplesPerSecond = 5375.1 MPI Rank 2: 08/16/2016 09:57:37: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14225625 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0317s; samplesPerSecond = 7886.9 @@ -2113,7 +2113,7 @@ MPI Rank 3: MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:31: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:31: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69922868 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1683s; samplesPerSecond = 1485.8 MPI Rank 3: 08/16/2016 09:57:31: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71203584 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0449s; samplesPerSecond = 5571.0 MPI Rank 3: 08/16/2016 09:57:31: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72631286 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0442s; samplesPerSecond = 5659.2 @@ -2158,7 +2158,7 @@ MPI Rank 3: 08/16/2016 09:57:33: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:33: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:33: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.31415305 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0654s; samplesPerSecond = 3824.1 MPI Rank 3: 08/16/2016 09:57:33: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.26920577 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0495s; samplesPerSecond = 5055.3 MPI Rank 3: 08/16/2016 09:57:33: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.22349829 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0735s; samplesPerSecond = 3401.1 @@ -2203,7 +2203,7 @@ MPI Rank 3: 08/16/2016 09:57:35: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:35: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:35: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12535183 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0616s; samplesPerSecond = 4060.2 MPI Rank 3: 08/16/2016 09:57:35: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17861531 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0444s; samplesPerSecond = 5635.1 MPI Rank 3: 08/16/2016 09:57:35: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14359719 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0514s; samplesPerSecond = 4859.2 @@ -2248,7 +2248,7 @@ MPI Rank 3: 08/16/2016 09:57:37: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:37: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:37: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12387404 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0457s; samplesPerSecond = 5474.5 MPI Rank 3: 08/16/2016 09:57:37: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18078590 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0465s; samplesPerSecond = 5374.7 MPI Rank 3: 08/16/2016 09:57:37: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14225625 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0317s; samplesPerSecond = 7886.9 diff --git a/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.gpu.txt b/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.gpu.txt index 672960cc6..af88d0c04 100644 --- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.gpu.txt +++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.gpu.txt @@ -478,7 +478,7 @@ MPI Rank 0: MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:46: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:46: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0224s; samplesPerSecond = 11152.3 MPI Rank 0: 08/16/2016 09:57:46: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0190s; samplesPerSecond = 13162.7 MPI Rank 0: 08/16/2016 09:57:46: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0192s; samplesPerSecond = 13013.4 @@ -524,7 +524,7 @@ MPI Rank 0: 08/16/2016 09:57:47: SGD: Saving checkpoint model '/tmp/cntk-test-20 MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:47: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:47: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0199s; samplesPerSecond = 12582.4 MPI Rank 0: 08/16/2016 09:57:47: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0190s; samplesPerSecond = 13160.0 MPI Rank 0: 08/16/2016 09:57:47: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0190s; samplesPerSecond = 13169.7 @@ -570,7 +570,7 @@ MPI Rank 0: 08/16/2016 09:57:48: SGD: Saving checkpoint model '/tmp/cntk-test-20 MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:48: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:48: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0192s; samplesPerSecond = 13007.3 MPI Rank 0: 08/16/2016 09:57:48: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0191s; samplesPerSecond = 13097.2 MPI Rank 0: 08/16/2016 09:57:48: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0190s; samplesPerSecond = 13152.4 @@ -616,7 +616,7 @@ MPI Rank 0: 08/16/2016 09:57:48: SGD: Saving checkpoint model '/tmp/cntk-test-20 MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:48: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:48: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0191s; samplesPerSecond = 13088.3 MPI Rank 0: 08/16/2016 09:57:48: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0192s; samplesPerSecond = 13015.4 MPI Rank 0: 08/16/2016 09:57:48: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0191s; samplesPerSecond = 13072.6 @@ -1027,7 +1027,7 @@ MPI Rank 1: MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:46: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:46: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0224s; samplesPerSecond = 11167.7 MPI Rank 1: 08/16/2016 09:57:46: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0190s; samplesPerSecond = 13152.4 MPI Rank 1: 08/16/2016 09:57:46: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0192s; samplesPerSecond = 13011.3 @@ -1072,7 +1072,7 @@ MPI Rank 1: 08/16/2016 09:57:47: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:47: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:47: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0199s; samplesPerSecond = 12576.7 MPI Rank 1: 08/16/2016 09:57:47: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0190s; samplesPerSecond = 13170.4 MPI Rank 1: 08/16/2016 09:57:47: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0190s; samplesPerSecond = 13166.9 @@ -1117,7 +1117,7 @@ MPI Rank 1: 08/16/2016 09:57:48: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:48: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:48: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0192s; samplesPerSecond = 12997.8 MPI Rank 1: 08/16/2016 09:57:48: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0191s; samplesPerSecond = 13088.3 MPI Rank 1: 08/16/2016 09:57:48: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0190s; samplesPerSecond = 13155.8 @@ -1162,7 +1162,7 @@ MPI Rank 1: 08/16/2016 09:57:48: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:48: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:48: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0191s; samplesPerSecond = 13087.6 MPI Rank 1: 08/16/2016 09:57:48: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0192s; samplesPerSecond = 13015.4 MPI Rank 1: 08/16/2016 09:57:48: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0191s; samplesPerSecond = 13065.7 @@ -1572,7 +1572,7 @@ MPI Rank 2: MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:46: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:46: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0224s; samplesPerSecond = 11148.3 MPI Rank 2: 08/16/2016 09:57:46: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0190s; samplesPerSecond = 13153.7 MPI Rank 2: 08/16/2016 09:57:46: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0192s; samplesPerSecond = 13011.3 @@ -1617,7 +1617,7 @@ MPI Rank 2: 08/16/2016 09:57:47: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:47: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:47: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0199s; samplesPerSecond = 12587.5 MPI Rank 2: 08/16/2016 09:57:47: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0190s; samplesPerSecond = 13157.2 MPI Rank 2: 08/16/2016 09:57:47: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0190s; samplesPerSecond = 13166.2 @@ -1662,7 +1662,7 @@ MPI Rank 2: 08/16/2016 09:57:48: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:48: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:48: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0192s; samplesPerSecond = 13010.7 MPI Rank 2: 08/16/2016 09:57:48: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0191s; samplesPerSecond = 13091.1 MPI Rank 2: 08/16/2016 09:57:48: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0190s; samplesPerSecond = 13153.7 @@ -1707,7 +1707,7 @@ MPI Rank 2: 08/16/2016 09:57:48: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 09:57:48: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:57:48: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0191s; samplesPerSecond = 13085.6 MPI Rank 2: 08/16/2016 09:57:48: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0192s; samplesPerSecond = 13024.9 MPI Rank 2: 08/16/2016 09:57:48: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0191s; samplesPerSecond = 13062.3 @@ -2117,7 +2117,7 @@ MPI Rank 3: MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:46: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:46: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0224s; samplesPerSecond = 11158.7 MPI Rank 3: 08/16/2016 09:57:46: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0190s; samplesPerSecond = 13155.1 MPI Rank 3: 08/16/2016 09:57:46: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0192s; samplesPerSecond = 13011.3 @@ -2162,7 +2162,7 @@ MPI Rank 3: 08/16/2016 09:57:47: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:47: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:47: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0199s; samplesPerSecond = 12581.8 MPI Rank 3: 08/16/2016 09:57:47: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0190s; samplesPerSecond = 13160.0 MPI Rank 3: 08/16/2016 09:57:47: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0190s; samplesPerSecond = 13168.3 @@ -2207,7 +2207,7 @@ MPI Rank 3: 08/16/2016 09:57:48: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:48: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:48: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0192s; samplesPerSecond = 12998.5 MPI Rank 3: 08/16/2016 09:57:48: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0191s; samplesPerSecond = 13089.0 MPI Rank 3: 08/16/2016 09:57:48: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0190s; samplesPerSecond = 13149.6 @@ -2252,7 +2252,7 @@ MPI Rank 3: 08/16/2016 09:57:48: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 09:57:48: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 09:57:48: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0191s; samplesPerSecond = 13078.7 MPI Rank 3: 08/16/2016 09:57:48: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0192s; samplesPerSecond = 13024.2 MPI Rank 3: 08/16/2016 09:57:48: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0191s; samplesPerSecond = 13061.7 diff --git a/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.windows.cpu.txt b/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.windows.cpu.txt index 70a4a78fd..6e63cf172 100644 --- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.windows.cpu.txt +++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.windows.cpu.txt @@ -474,7 +474,7 @@ MPI Rank 0: MPI Rank 0: MPI Rank 0: 08/16/2016 03:01:29: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:01:29: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70086033 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0147s; samplesPerSecond = 16956.0 MPI Rank 0: 08/16/2016 03:01:29: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0116s; samplesPerSecond = 21626.3 MPI Rank 0: 08/16/2016 03:01:29: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.73052450 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0108s; samplesPerSecond = 23062.7 @@ -520,7 +520,7 @@ MPI Rank 0: 08/16/2016 03:01:30: SGD: Saving checkpoint model 'C:\Users\svcphil\ MPI Rank 0: MPI Rank 0: 08/16/2016 03:01:30: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:01:30: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.34943644 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0107s; samplesPerSecond = 23286.1 MPI Rank 0: 08/16/2016 03:01:30: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.29916625 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0108s; samplesPerSecond = 23126.7 MPI Rank 0: 08/16/2016 03:01:30: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.24878148 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0104s; samplesPerSecond = 23967.0 @@ -566,7 +566,7 @@ MPI Rank 0: 08/16/2016 03:01:30: SGD: Saving checkpoint model 'C:\Users\svcphil\ MPI Rank 0: MPI Rank 0: 08/16/2016 03:01:30: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:01:30: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12580243 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0097s; samplesPerSecond = 25858.5 MPI Rank 0: 08/16/2016 03:01:30: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0098s; samplesPerSecond = 25403.9 MPI Rank 0: 08/16/2016 03:01:30: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14339012 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0097s; samplesPerSecond = 25677.9 @@ -612,7 +612,7 @@ MPI Rank 0: 08/16/2016 03:01:30: SGD: Saving checkpoint model 'C:\Users\svcphil\ MPI Rank 0: MPI Rank 0: 08/16/2016 03:01:30: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:01:30: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0100s; samplesPerSecond = 24985.0 MPI Rank 0: 08/16/2016 03:01:30: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18118389 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0097s; samplesPerSecond = 25646.3 MPI Rank 0: 08/16/2016 03:01:30: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0095s; samplesPerSecond = 26246.7 @@ -1023,7 +1023,7 @@ MPI Rank 1: MPI Rank 1: MPI Rank 1: 08/16/2016 03:01:29: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:01:29: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70086033 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0144s; samplesPerSecond = 17364.7 MPI Rank 1: 08/16/2016 03:01:29: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0116s; samplesPerSecond = 21602.0 MPI Rank 1: 08/16/2016 03:01:29: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.73052450 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0109s; samplesPerSecond = 22994.8 @@ -1068,7 +1068,7 @@ MPI Rank 1: 08/16/2016 03:01:30: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 03:01:30: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:01:30: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.34943644 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0108s; samplesPerSecond = 23249.3 MPI Rank 1: 08/16/2016 03:01:30: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.29916625 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0108s; samplesPerSecond = 23067.0 MPI Rank 1: 08/16/2016 03:01:30: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.24878148 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0105s; samplesPerSecond = 23859.5 @@ -1113,7 +1113,7 @@ MPI Rank 1: 08/16/2016 03:01:30: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 03:01:30: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:01:30: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12580243 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0096s; samplesPerSecond = 25930.9 MPI Rank 1: 08/16/2016 03:01:30: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0099s; samplesPerSecond = 25148.4 MPI Rank 1: 08/16/2016 03:01:30: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14339012 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0098s; samplesPerSecond = 25544.1 @@ -1158,7 +1158,7 @@ MPI Rank 1: 08/16/2016 03:01:30: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 03:01:30: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:01:30: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0100s; samplesPerSecond = 25108.0 MPI Rank 1: 08/16/2016 03:01:30: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18118389 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0098s; samplesPerSecond = 25510.2 MPI Rank 1: 08/16/2016 03:01:30: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0096s; samplesPerSecond = 26126.0 @@ -1568,7 +1568,7 @@ MPI Rank 2: MPI Rank 2: MPI Rank 2: 08/16/2016 03:01:29: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:01:29: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70086033 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0147s; samplesPerSecond = 16960.7 MPI Rank 2: 08/16/2016 03:01:29: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0116s; samplesPerSecond = 21613.2 MPI Rank 2: 08/16/2016 03:01:29: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.73052450 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0108s; samplesPerSecond = 23058.5 @@ -1613,7 +1613,7 @@ MPI Rank 2: 08/16/2016 03:01:30: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 03:01:30: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:01:30: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.34943644 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0108s; samplesPerSecond = 23245.0 MPI Rank 2: 08/16/2016 03:01:30: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.29916625 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0108s; samplesPerSecond = 23075.5 MPI Rank 2: 08/16/2016 03:01:30: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.24878148 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0105s; samplesPerSecond = 23818.6 @@ -1658,7 +1658,7 @@ MPI Rank 2: 08/16/2016 03:01:30: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 03:01:30: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:01:30: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12580243 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0097s; samplesPerSecond = 25823.8 MPI Rank 2: 08/16/2016 03:01:30: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0102s; samplesPerSecond = 24616.0 MPI Rank 2: 08/16/2016 03:01:30: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14339012 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0097s; samplesPerSecond = 25654.2 @@ -1703,7 +1703,7 @@ MPI Rank 2: 08/16/2016 03:01:30: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 03:01:30: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:01:30: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0100s; samplesPerSecond = 25110.5 MPI Rank 2: 08/16/2016 03:01:30: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18118389 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0098s; samplesPerSecond = 25434.9 MPI Rank 2: 08/16/2016 03:01:30: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0096s; samplesPerSecond = 26087.9 @@ -2113,7 +2113,7 @@ MPI Rank 3: MPI Rank 3: MPI Rank 3: 08/16/2016 03:01:29: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:01:29: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70086033 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0146s; samplesPerSecond = 17070.7 MPI Rank 3: 08/16/2016 03:01:29: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0116s; samplesPerSecond = 21603.9 MPI Rank 3: 08/16/2016 03:01:29: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.73052450 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0109s; samplesPerSecond = 22958.9 @@ -2158,7 +2158,7 @@ MPI Rank 3: 08/16/2016 03:01:30: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 03:01:30: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:01:30: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.34943644 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0107s; samplesPerSecond = 23292.6 MPI Rank 3: 08/16/2016 03:01:30: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.29916625 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0109s; samplesPerSecond = 23033.0 MPI Rank 3: 08/16/2016 03:01:30: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.24878148 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0104s; samplesPerSecond = 23992.3 @@ -2203,7 +2203,7 @@ MPI Rank 3: 08/16/2016 03:01:30: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 03:01:30: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:01:30: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12580243 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0097s; samplesPerSecond = 25767.9 MPI Rank 3: 08/16/2016 03:01:30: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0099s; samplesPerSecond = 25255.1 MPI Rank 3: 08/16/2016 03:01:30: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14339012 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0098s; samplesPerSecond = 25578.1 @@ -2248,7 +2248,7 @@ MPI Rank 3: 08/16/2016 03:01:30: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 03:01:30: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:01:30: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0100s; samplesPerSecond = 24982.5 MPI Rank 3: 08/16/2016 03:01:30: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18118389 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0097s; samplesPerSecond = 25683.2 MPI Rank 3: 08/16/2016 03:01:30: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0096s; samplesPerSecond = 26158.8 diff --git a/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.windows.gpu.txt b/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.windows.gpu.txt index 54905b2ef..499c8d04b 100644 --- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.windows.gpu.txt +++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.windows.gpu.txt @@ -475,7 +475,7 @@ MPI Rank 0: MPI Rank 0: MPI Rank 0: 08/16/2016 03:01:45: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:01:45: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0501s; samplesPerSecond = 4991.1 MPI Rank 0: 08/16/2016 03:01:45: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0441s; samplesPerSecond = 5669.3 MPI Rank 0: 08/16/2016 03:01:45: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0437s; samplesPerSecond = 5717.2 @@ -521,7 +521,7 @@ MPI Rank 0: 08/16/2016 03:01:47: SGD: Saving checkpoint model 'C:\Users\svcphil\ MPI Rank 0: MPI Rank 0: 08/16/2016 03:01:47: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:01:47: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0418s; samplesPerSecond = 5977.9 MPI Rank 0: 08/16/2016 03:01:47: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0418s; samplesPerSecond = 5974.1 MPI Rank 0: 08/16/2016 03:01:47: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0414s; samplesPerSecond = 6034.7 @@ -567,7 +567,7 @@ MPI Rank 0: 08/16/2016 03:01:49: SGD: Saving checkpoint model 'C:\Users\svcphil\ MPI Rank 0: MPI Rank 0: 08/16/2016 03:01:49: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:01:49: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0408s; samplesPerSecond = 6125.3 MPI Rank 0: 08/16/2016 03:01:49: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0408s; samplesPerSecond = 6134.7 MPI Rank 0: 08/16/2016 03:01:49: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0408s; samplesPerSecond = 6134.1 @@ -613,7 +613,7 @@ MPI Rank 0: 08/16/2016 03:01:50: SGD: Saving checkpoint model 'C:\Users\svcphil\ MPI Rank 0: MPI Rank 0: 08/16/2016 03:01:50: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 0: -MPI Rank 0: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:01:50: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0415s; samplesPerSecond = 6017.7 MPI Rank 0: 08/16/2016 03:01:50: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0411s; samplesPerSecond = 6089.4 MPI Rank 0: 08/16/2016 03:01:51: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0411s; samplesPerSecond = 6081.8 @@ -1025,7 +1025,7 @@ MPI Rank 1: MPI Rank 1: MPI Rank 1: 08/16/2016 03:01:45: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:01:45: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0500s; samplesPerSecond = 4999.0 MPI Rank 1: 08/16/2016 03:01:45: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0441s; samplesPerSecond = 5668.0 MPI Rank 1: 08/16/2016 03:01:45: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0438s; samplesPerSecond = 5714.3 @@ -1070,7 +1070,7 @@ MPI Rank 1: 08/16/2016 03:01:47: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 03:01:47: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:01:47: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0419s; samplesPerSecond = 5968.3 MPI Rank 1: 08/16/2016 03:01:47: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0417s; samplesPerSecond = 5993.5 MPI Rank 1: 08/16/2016 03:01:47: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0414s; samplesPerSecond = 6033.4 @@ -1115,7 +1115,7 @@ MPI Rank 1: 08/16/2016 03:01:49: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 03:01:49: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:01:49: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0409s; samplesPerSecond = 6107.4 MPI Rank 1: 08/16/2016 03:01:49: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0407s; samplesPerSecond = 6135.7 MPI Rank 1: 08/16/2016 03:01:49: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0408s; samplesPerSecond = 6131.4 @@ -1160,7 +1160,7 @@ MPI Rank 1: 08/16/2016 03:01:50: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 1: MPI Rank 1: 08/16/2016 03:01:50: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 1: -MPI Rank 1: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:01:50: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0417s; samplesPerSecond = 5996.9 MPI Rank 1: 08/16/2016 03:01:50: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0410s; samplesPerSecond = 6099.5 MPI Rank 1: 08/16/2016 03:01:51: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0409s; samplesPerSecond = 6108.3 @@ -1571,7 +1571,7 @@ MPI Rank 2: MPI Rank 2: MPI Rank 2: 08/16/2016 03:01:45: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:01:45: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0500s; samplesPerSecond = 5001.6 MPI Rank 2: 08/16/2016 03:01:45: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0442s; samplesPerSecond = 5651.4 MPI Rank 2: 08/16/2016 03:01:45: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0437s; samplesPerSecond = 5716.9 @@ -1616,7 +1616,7 @@ MPI Rank 2: 08/16/2016 03:01:47: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 03:01:47: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:01:47: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0418s; samplesPerSecond = 5980.7 MPI Rank 2: 08/16/2016 03:01:47: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0419s; samplesPerSecond = 5970.4 MPI Rank 2: 08/16/2016 03:01:47: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0414s; samplesPerSecond = 6032.8 @@ -1661,7 +1661,7 @@ MPI Rank 2: 08/16/2016 03:01:49: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 03:01:49: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:01:49: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0410s; samplesPerSecond = 6099.9 MPI Rank 2: 08/16/2016 03:01:49: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0408s; samplesPerSecond = 6128.8 MPI Rank 2: 08/16/2016 03:01:49: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0408s; samplesPerSecond = 6131.7 @@ -1706,7 +1706,7 @@ MPI Rank 2: 08/16/2016 03:01:50: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 2: MPI Rank 2: 08/16/2016 03:01:50: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 2: -MPI Rank 2: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:01:50: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0416s; samplesPerSecond = 6010.9 MPI Rank 2: 08/16/2016 03:01:50: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0410s; samplesPerSecond = 6100.5 MPI Rank 2: 08/16/2016 03:01:51: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0411s; samplesPerSecond = 6076.1 @@ -2117,7 +2117,7 @@ MPI Rank 3: MPI Rank 3: MPI Rank 3: 08/16/2016 03:01:45: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:01:45: Epoch[ 1 of 4]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0501s; samplesPerSecond = 4987.8 MPI Rank 3: 08/16/2016 03:01:45: Epoch[ 1 of 4]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0440s; samplesPerSecond = 5676.8 MPI Rank 3: 08/16/2016 03:01:45: Epoch[ 1 of 4]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0437s; samplesPerSecond = 5717.7 @@ -2162,7 +2162,7 @@ MPI Rank 3: 08/16/2016 03:01:47: Finished Epoch[ 1 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 03:01:47: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:01:47: Epoch[ 2 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0419s; samplesPerSecond = 5970.3 MPI Rank 3: 08/16/2016 03:01:47: Epoch[ 2 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0418s; samplesPerSecond = 5987.2 MPI Rank 3: 08/16/2016 03:01:47: Epoch[ 2 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0415s; samplesPerSecond = 6031.1 @@ -2207,7 +2207,7 @@ MPI Rank 3: 08/16/2016 03:01:49: Finished Epoch[ 2 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 03:01:49: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:01:49: Epoch[ 3 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0409s; samplesPerSecond = 6107.7 MPI Rank 3: 08/16/2016 03:01:49: Epoch[ 3 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0408s; samplesPerSecond = 6130.2 MPI Rank 3: 08/16/2016 03:01:49: Epoch[ 3 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0408s; samplesPerSecond = 6130.9 @@ -2252,7 +2252,7 @@ MPI Rank 3: 08/16/2016 03:01:50: Finished Epoch[ 3 of 4]: [Training] CrossEntrop MPI Rank 3: MPI Rank 3: 08/16/2016 03:01:50: Starting Epoch 4: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples MPI Rank 3: -MPI Rank 3: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 3: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 3: 08/16/2016 03:01:50: Epoch[ 4 of 4]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0417s; samplesPerSecond = 6001.1 MPI Rank 3: 08/16/2016 03:01:50: Epoch[ 4 of 4]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0410s; samplesPerSecond = 6097.1 MPI Rank 3: 08/16/2016 03:01:51: Epoch[ 4 of 4]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0410s; samplesPerSecond = 6094.4 diff --git a/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/testcases.yml b/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/testcases.yml index dd5e1b19e..e9b6ea400 100644 --- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/testcases.yml +++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/testcases.yml @@ -35,6 +35,6 @@ testCases: - ^MPI Rank {{integer}} - Starting minibatch loop - DataParallelSGD training - - MyRank = {{integer}} - - NumNodes = 4 - - NumGradientBits = 32 + - myRank = {{integer}} + - numNodes = 4 + - numGradientBits = 32 diff --git a/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.cpu.txt b/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.cpu.txt index 3156462a0..ee8512a12 100644 --- a/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.cpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.cpu.txt @@ -549,7 +549,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:34: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:34: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:34: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:35: Epoch[ 2 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 3.95183370 * 2560; err = 0.88007813 * 2560; time = 0.9133s; samplesPerSecond = 2803.0 MPI Rank 0: 08/16/2016 09:57:36: Epoch[ 2 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 3.87879531 * 2560; err = 0.87578125 * 2560; time = 0.8783s; samplesPerSecond = 2914.9 MPI Rank 0: 08/16/2016 09:57:37: Epoch[ 2 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 3.85396065 * 2560; err = 0.87578125 * 2560; time = 0.8777s; samplesPerSecond = 2916.6 @@ -565,7 +565,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:42: Starting Epoch 3: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:42: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:42: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:42: Epoch[ 3 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 3.37945068 * 2560; err = 0.79882812 * 2560; time = 0.8417s; samplesPerSecond = 3041.5 MPI Rank 0: 08/16/2016 09:57:43: Epoch[ 3 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 3.29694288 * 2560; err = 0.79570312 * 2560; time = 0.8678s; samplesPerSecond = 2950.0 MPI Rank 0: 08/16/2016 09:57:44: Epoch[ 3 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 3.30035303 * 2560; err = 0.80468750 * 2560; time = 0.8295s; samplesPerSecond = 3086.3 @@ -580,7 +580,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:48: Starting Epoch 4: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:49: Epoch[ 4 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 3.01965680 * 2560; err = 0.74375000 * 2560; time = 0.8373s; samplesPerSecond = 3057.6 MPI Rank 0: 08/16/2016 09:57:50: Epoch[ 4 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.94570588 * 2560; err = 0.72031250 * 2560; time = 0.8488s; samplesPerSecond = 3016.1 MPI Rank 0: 08/16/2016 09:57:51: Epoch[ 4 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.92723926 * 2560; err = 0.71875000 * 2560; time = 0.8374s; samplesPerSecond = 3057.2 @@ -596,7 +596,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:55: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:55: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:55: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:56: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.62996846 * 2560; err = 0.65039062 * 2560; time = 0.8067s; samplesPerSecond = 3173.4 MPI Rank 0: 08/16/2016 09:57:57: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.62577187 * 2560; err = 0.66914063 * 2560; time = 0.8110s; samplesPerSecond = 3156.7 MPI Rank 0: 08/16/2016 09:57:57: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.50552697 * 2560; err = 0.64570313 * 2560; time = 0.7916s; samplesPerSecond = 3234.0 @@ -1101,7 +1101,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:34: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:34: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:34: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:35: Epoch[ 2 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 3.95183370 * 2560; err = 0.88007813 * 2560; time = 0.9135s; samplesPerSecond = 2802.3 MPI Rank 1: 08/16/2016 09:57:36: Epoch[ 2 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 3.87879531 * 2560; err = 0.87578125 * 2560; time = 0.8781s; samplesPerSecond = 2915.3 MPI Rank 1: 08/16/2016 09:57:37: Epoch[ 2 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 3.85396065 * 2560; err = 0.87578125 * 2560; time = 0.8778s; samplesPerSecond = 2916.4 @@ -1116,7 +1116,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:42: Starting Epoch 3: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:42: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:42: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:42: Epoch[ 3 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 3.37945068 * 2560; err = 0.79882812 * 2560; time = 0.8417s; samplesPerSecond = 3041.5 MPI Rank 1: 08/16/2016 09:57:43: Epoch[ 3 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 3.29694288 * 2560; err = 0.79570312 * 2560; time = 0.8680s; samplesPerSecond = 2949.2 MPI Rank 1: 08/16/2016 09:57:44: Epoch[ 3 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 3.30035303 * 2560; err = 0.80468750 * 2560; time = 0.8294s; samplesPerSecond = 3086.6 @@ -1130,7 +1130,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:48: Starting Epoch 4: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:49: Epoch[ 4 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 3.01965680 * 2560; err = 0.74375000 * 2560; time = 0.8374s; samplesPerSecond = 3057.1 MPI Rank 1: 08/16/2016 09:57:50: Epoch[ 4 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.94570588 * 2560; err = 0.72031250 * 2560; time = 0.8488s; samplesPerSecond = 3016.0 MPI Rank 1: 08/16/2016 09:57:51: Epoch[ 4 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.92723926 * 2560; err = 0.71875000 * 2560; time = 0.8375s; samplesPerSecond = 3056.6 @@ -1145,7 +1145,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:55: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:55: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:55: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:56: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.62996846 * 2560; err = 0.65039062 * 2560; time = 0.8070s; samplesPerSecond = 3172.1 MPI Rank 1: 08/16/2016 09:57:57: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.62577187 * 2560; err = 0.66914063 * 2560; time = 0.8108s; samplesPerSecond = 3157.6 MPI Rank 1: 08/16/2016 09:57:57: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.50552697 * 2560; err = 0.64570313 * 2560; time = 0.7916s; samplesPerSecond = 3234.0 @@ -1667,7 +1667,7 @@ MPI Rank 0: 08/16/2016 09:58:04: Starting Epoch 4: learning rate per sample = 0. MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms MPI Rank 0: -MPI Rank 0: 08/16/2016 09:58:04: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:58:04: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:58:05: Epoch[ 4 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 3.01965680 * 2560; err = 0.74375000 * 2560; time = 0.8707s; samplesPerSecond = 2940.0 MPI Rank 0: 08/16/2016 09:58:06: Epoch[ 4 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.94570588 * 2560; err = 0.72031250 * 2560; time = 0.8271s; samplesPerSecond = 3095.2 MPI Rank 0: 08/16/2016 09:58:07: Epoch[ 4 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.92723926 * 2560; err = 0.71875000 * 2560; time = 0.8286s; samplesPerSecond = 3089.5 @@ -1683,7 +1683,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:58:11: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:58:11: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:58:11: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:58:12: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.62996846 * 2560; err = 0.65039062 * 2560; time = 0.7991s; samplesPerSecond = 3203.6 MPI Rank 0: 08/16/2016 09:58:12: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.62577187 * 2560; err = 0.66914063 * 2560; time = 0.8107s; samplesPerSecond = 3157.8 MPI Rank 0: 08/16/2016 09:58:13: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.50552697 * 2560; err = 0.64570313 * 2560; time = 0.7913s; samplesPerSecond = 3235.3 @@ -2147,7 +2147,7 @@ MPI Rank 1: 08/16/2016 09:58:04: Starting Epoch 4: learning rate per sample = 0. MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms MPI Rank 1: -MPI Rank 1: 08/16/2016 09:58:04: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:58:04: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:58:05: Epoch[ 4 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 3.01965680 * 2560; err = 0.74375000 * 2560; time = 0.9453s; samplesPerSecond = 2708.2 MPI Rank 1: 08/16/2016 09:58:06: Epoch[ 4 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.94570588 * 2560; err = 0.72031250 * 2560; time = 0.8269s; samplesPerSecond = 3095.9 MPI Rank 1: 08/16/2016 09:58:07: Epoch[ 4 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.92723926 * 2560; err = 0.71875000 * 2560; time = 0.8288s; samplesPerSecond = 3088.9 @@ -2162,7 +2162,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:58:11: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:58:11: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:58:11: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:58:12: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.62996846 * 2560; err = 0.65039062 * 2560; time = 0.7994s; samplesPerSecond = 3202.2 MPI Rank 1: 08/16/2016 09:58:12: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.62577187 * 2560; err = 0.66914063 * 2560; time = 0.8101s; samplesPerSecond = 3160.0 MPI Rank 1: 08/16/2016 09:58:13: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.50552697 * 2560; err = 0.64570313 * 2560; time = 0.7917s; samplesPerSecond = 3233.4 diff --git a/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.gpu.txt b/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.gpu.txt index 8658eaa4c..fc01709df 100644 --- a/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.gpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.gpu.txt @@ -553,7 +553,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:58:24: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: (GPU): creating curand object with seed 4 MPI Rank 0: (GPU): creating curand object with seed 5 MPI Rank 0: (GPU): creating curand object with seed 6 @@ -573,7 +573,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:58:24: Starting Epoch 3: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: (GPU): creating curand object with seed 8 MPI Rank 0: (GPU): creating curand object with seed 9 MPI Rank 0: (GPU): creating curand object with seed 10 @@ -592,7 +592,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:58:25: Starting Epoch 4: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:58:25: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:58:25: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: (GPU): creating curand object with seed 12 MPI Rank 0: (GPU): creating curand object with seed 13 MPI Rank 0: (GPU): creating curand object with seed 14 @@ -612,7 +612,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:58:26: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:58:26: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:58:26: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:58:26: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.62055426 * 2560; err = 0.64882812 * 2560; time = 0.0643s; samplesPerSecond = 39801.6 MPI Rank 0: 08/16/2016 09:58:26: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.61482008 * 2560; err = 0.66875000 * 2560; time = 0.0622s; samplesPerSecond = 41125.2 MPI Rank 0: 08/16/2016 09:58:26: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.50300923 * 2560; err = 0.64531250 * 2560; time = 0.0617s; samplesPerSecond = 41475.0 @@ -1121,7 +1121,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:58:24: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: (GPU): creating curand object with seed 24 MPI Rank 1: (GPU): creating curand object with seed 25 MPI Rank 1: (GPU): creating curand object with seed 26 @@ -1140,7 +1140,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:58:24: Starting Epoch 3: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: (GPU): creating curand object with seed 28 MPI Rank 1: (GPU): creating curand object with seed 29 MPI Rank 1: (GPU): creating curand object with seed 30 @@ -1158,7 +1158,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:58:25: Starting Epoch 4: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:58:25: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:58:25: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: (GPU): creating curand object with seed 32 MPI Rank 1: (GPU): creating curand object with seed 33 MPI Rank 1: (GPU): creating curand object with seed 34 @@ -1177,7 +1177,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:58:26: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:58:26: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:58:26: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:58:26: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.62055426 * 2560; err = 0.64882812 * 2560; time = 0.0646s; samplesPerSecond = 39645.1 MPI Rank 1: 08/16/2016 09:58:26: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.61482008 * 2560; err = 0.66875000 * 2560; time = 0.0624s; samplesPerSecond = 41004.0 MPI Rank 1: 08/16/2016 09:58:26: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.50300923 * 2560; err = 0.64531250 * 2560; time = 0.0617s; samplesPerSecond = 41497.1 @@ -1699,7 +1699,7 @@ MPI Rank 0: 08/16/2016 09:58:31: Starting Epoch 4: learning rate per sample = 0. MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms MPI Rank 0: -MPI Rank 0: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: (GPU): creating curand object with seed 12 MPI Rank 0: (GPU): creating curand object with seed 13 MPI Rank 0: (GPU): creating curand object with seed 14 @@ -1719,7 +1719,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:58:32: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:58:32: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.62055426 * 2560; err = 0.64882812 * 2560; time = 0.0631s; samplesPerSecond = 40566.7 MPI Rank 0: 08/16/2016 09:58:32: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.61482008 * 2560; err = 0.66875000 * 2560; time = 0.0615s; samplesPerSecond = 41624.0 MPI Rank 0: 08/16/2016 09:58:32: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.50300923 * 2560; err = 0.64531250 * 2560; time = 0.0617s; samplesPerSecond = 41518.7 @@ -2183,7 +2183,7 @@ MPI Rank 1: 08/16/2016 09:58:31: Starting Epoch 4: learning rate per sample = 0. MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms MPI Rank 1: -MPI Rank 1: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: (GPU): creating curand object with seed 32 MPI Rank 1: (GPU): creating curand object with seed 33 MPI Rank 1: (GPU): creating curand object with seed 34 @@ -2202,7 +2202,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:58:32: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:58:32: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.62055426 * 2560; err = 0.64882812 * 2560; time = 0.0633s; samplesPerSecond = 40438.5 MPI Rank 1: 08/16/2016 09:58:32: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.61482008 * 2560; err = 0.66875000 * 2560; time = 0.0615s; samplesPerSecond = 41657.2 MPI Rank 1: 08/16/2016 09:58:32: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.50300923 * 2560; err = 0.64531250 * 2560; time = 0.0618s; samplesPerSecond = 41426.6 diff --git a/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.windows.cpu.txt index 2c185c850..41564d901 100644 --- a/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.windows.cpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.windows.cpu.txt @@ -545,7 +545,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:02:27: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:02:27: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:02:27: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:02:28: Epoch[ 2 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 3.94527147 * 2560; err = 0.88906250 * 2560; time = 0.9421s; samplesPerSecond = 2717.4 MPI Rank 0: 08/16/2016 03:02:29: Epoch[ 2 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 3.86277831 * 2560; err = 0.87773437 * 2560; time = 0.9084s; samplesPerSecond = 2818.2 MPI Rank 0: 08/16/2016 03:02:30: Epoch[ 2 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 3.85494012 * 2560; err = 0.89140625 * 2560; time = 0.9825s; samplesPerSecond = 2605.5 @@ -561,7 +561,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:02:35: Starting Epoch 3: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:02:35: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:02:35: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:02:36: Epoch[ 3 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 3.34316119 * 2560; err = 0.79257813 * 2560; time = 1.1723s; samplesPerSecond = 2183.7 MPI Rank 0: 08/16/2016 03:02:37: Epoch[ 3 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 3.24841775 * 2560; err = 0.79257813 * 2560; time = 0.9683s; samplesPerSecond = 2643.8 MPI Rank 0: 08/16/2016 03:02:38: Epoch[ 3 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 3.25792707 * 2560; err = 0.78632813 * 2560; time = 0.8789s; samplesPerSecond = 2912.6 @@ -576,7 +576,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:02:42: Starting Epoch 4: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:02:42: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:02:42: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:02:43: Epoch[ 4 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.98185381 * 2560; err = 0.73554688 * 2560; time = 1.1723s; samplesPerSecond = 2183.8 MPI Rank 0: 08/16/2016 03:02:44: Epoch[ 4 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.90257182 * 2560; err = 0.70781250 * 2560; time = 1.0254s; samplesPerSecond = 2496.6 MPI Rank 0: 08/16/2016 03:02:45: Epoch[ 4 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.87658405 * 2560; err = 0.70937500 * 2560; time = 0.6891s; samplesPerSecond = 3714.7 @@ -592,7 +592,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:02:50: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:02:50: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:02:50: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:02:51: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.58512907 * 2560; err = 0.64375000 * 2560; time = 0.8140s; samplesPerSecond = 3144.9 MPI Rank 0: 08/16/2016 03:02:52: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.58142844 * 2560; err = 0.66601563 * 2560; time = 0.7814s; samplesPerSecond = 3276.0 MPI Rank 0: 08/16/2016 03:02:53: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.46930316 * 2560; err = 0.64570313 * 2560; time = 1.0202s; samplesPerSecond = 2509.4 @@ -1095,7 +1095,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:02:27: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:02:27: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:02:27: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:02:28: Epoch[ 2 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 3.94527147 * 2560; err = 0.88906250 * 2560; time = 0.9409s; samplesPerSecond = 2720.7 MPI Rank 1: 08/16/2016 03:02:29: Epoch[ 2 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 3.86277831 * 2560; err = 0.87773437 * 2560; time = 0.9084s; samplesPerSecond = 2818.2 MPI Rank 1: 08/16/2016 03:02:30: Epoch[ 2 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 3.85494012 * 2560; err = 0.89140625 * 2560; time = 0.9825s; samplesPerSecond = 2605.7 @@ -1110,7 +1110,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:02:35: Starting Epoch 3: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:02:35: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:02:35: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:02:36: Epoch[ 3 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 3.34316119 * 2560; err = 0.79257813 * 2560; time = 1.1725s; samplesPerSecond = 2183.3 MPI Rank 1: 08/16/2016 03:02:37: Epoch[ 3 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 3.24841775 * 2560; err = 0.79257813 * 2560; time = 0.9674s; samplesPerSecond = 2646.2 MPI Rank 1: 08/16/2016 03:02:38: Epoch[ 3 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 3.25792707 * 2560; err = 0.78632813 * 2560; time = 0.8798s; samplesPerSecond = 2909.7 @@ -1124,7 +1124,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:02:42: Starting Epoch 4: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:02:42: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:02:42: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:02:43: Epoch[ 4 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.98185381 * 2560; err = 0.73554688 * 2560; time = 1.1737s; samplesPerSecond = 2181.1 MPI Rank 1: 08/16/2016 03:02:44: Epoch[ 4 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.90257182 * 2560; err = 0.70781250 * 2560; time = 1.0242s; samplesPerSecond = 2499.4 MPI Rank 1: 08/16/2016 03:02:45: Epoch[ 4 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.87658405 * 2560; err = 0.70937500 * 2560; time = 0.6909s; samplesPerSecond = 3705.2 @@ -1139,7 +1139,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:02:50: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:02:50: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:02:50: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:02:51: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.58512907 * 2560; err = 0.64375000 * 2560; time = 0.8153s; samplesPerSecond = 3140.0 MPI Rank 1: 08/16/2016 03:02:52: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.58142844 * 2560; err = 0.66601563 * 2560; time = 0.7794s; samplesPerSecond = 3284.6 MPI Rank 1: 08/16/2016 03:02:53: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.46930316 * 2560; err = 0.64570313 * 2560; time = 1.0223s; samplesPerSecond = 2504.2 @@ -1657,7 +1657,7 @@ MPI Rank 0: 08/16/2016 03:03:01: Starting Epoch 4: learning rate per sample = 0. MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:03:02: Epoch[ 4 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.98185381 * 2560; err = 0.73554688 * 2560; time = 0.9824s; samplesPerSecond = 2605.9 MPI Rank 0: 08/16/2016 03:03:03: Epoch[ 4 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.90257182 * 2560; err = 0.70781250 * 2560; time = 1.0388s; samplesPerSecond = 2464.3 MPI Rank 0: 08/16/2016 03:03:04: Epoch[ 4 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.87658405 * 2560; err = 0.70937500 * 2560; time = 0.9608s; samplesPerSecond = 2664.4 @@ -1673,7 +1673,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:09: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:09: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:09: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:03:10: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.58512907 * 2560; err = 0.64375000 * 2560; time = 0.9136s; samplesPerSecond = 2802.2 MPI Rank 0: 08/16/2016 03:03:11: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.58142844 * 2560; err = 0.66601563 * 2560; time = 0.9281s; samplesPerSecond = 2758.3 MPI Rank 0: 08/16/2016 03:03:12: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.46930316 * 2560; err = 0.64570313 * 2560; time = 1.0057s; samplesPerSecond = 2545.5 @@ -2135,7 +2135,7 @@ MPI Rank 1: 08/16/2016 03:03:01: Starting Epoch 4: learning rate per sample = 0. MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:03:02: Epoch[ 4 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.98185381 * 2560; err = 0.73554688 * 2560; time = 1.0301s; samplesPerSecond = 2485.2 MPI Rank 1: 08/16/2016 03:03:03: Epoch[ 4 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.90257182 * 2560; err = 0.70781250 * 2560; time = 1.0388s; samplesPerSecond = 2464.3 MPI Rank 1: 08/16/2016 03:03:04: Epoch[ 4 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.87658405 * 2560; err = 0.70937500 * 2560; time = 0.9608s; samplesPerSecond = 2664.5 @@ -2150,7 +2150,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:09: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:09: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:09: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:03:10: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.58512907 * 2560; err = 0.64375000 * 2560; time = 0.9148s; samplesPerSecond = 2798.3 MPI Rank 1: 08/16/2016 03:03:11: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.58142844 * 2560; err = 0.66601563 * 2560; time = 0.9282s; samplesPerSecond = 2757.9 MPI Rank 1: 08/16/2016 03:03:12: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.46930316 * 2560; err = 0.64570313 * 2560; time = 1.0036s; samplesPerSecond = 2550.8 diff --git a/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.windows.gpu.txt index 714a8164b..35c53c0eb 100644 --- a/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.windows.gpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.windows.gpu.txt @@ -549,7 +549,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:24: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:24: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:24: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: (GPU): creating curand object with seed 4 MPI Rank 0: (GPU): creating curand object with seed 5 MPI Rank 0: (GPU): creating curand object with seed 6 @@ -569,7 +569,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:25: Starting Epoch 3: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:25: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:25: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: (GPU): creating curand object with seed 8 MPI Rank 0: (GPU): creating curand object with seed 9 MPI Rank 0: (GPU): creating curand object with seed 10 @@ -588,7 +588,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:26: Starting Epoch 4: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:26: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:26: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: (GPU): creating curand object with seed 12 MPI Rank 0: (GPU): creating curand object with seed 13 MPI Rank 0: (GPU): creating curand object with seed 14 @@ -608,7 +608,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:27: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:27: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:27: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:03:27: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.49174185 * 2560; err = 0.61718750 * 2560; time = 0.1077s; samplesPerSecond = 23779.7 MPI Rank 0: 08/16/2016 03:03:27: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.49057629 * 2560; err = 0.64296875 * 2560; time = 0.1058s; samplesPerSecond = 24206.9 MPI Rank 0: 08/16/2016 03:03:27: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.37584589 * 2560; err = 0.62226563 * 2560; time = 0.1088s; samplesPerSecond = 23534.4 @@ -1115,7 +1115,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:24: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:24: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:24: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: (GPU): creating curand object with seed 24 MPI Rank 1: (GPU): creating curand object with seed 25 MPI Rank 1: (GPU): creating curand object with seed 26 @@ -1134,7 +1134,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:25: Starting Epoch 3: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:25: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:25: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: (GPU): creating curand object with seed 28 MPI Rank 1: (GPU): creating curand object with seed 29 MPI Rank 1: (GPU): creating curand object with seed 30 @@ -1152,7 +1152,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:26: Starting Epoch 4: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:26: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:26: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: (GPU): creating curand object with seed 32 MPI Rank 1: (GPU): creating curand object with seed 33 MPI Rank 1: (GPU): creating curand object with seed 34 @@ -1171,7 +1171,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:27: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:27: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:27: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:03:27: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.49174185 * 2560; err = 0.61718750 * 2560; time = 0.1071s; samplesPerSecond = 23903.1 MPI Rank 1: 08/16/2016 03:03:27: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.49057629 * 2560; err = 0.64296875 * 2560; time = 0.1057s; samplesPerSecond = 24212.4 MPI Rank 1: 08/16/2016 03:03:27: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.37584589 * 2560; err = 0.62226563 * 2560; time = 0.1088s; samplesPerSecond = 23531.1 @@ -1689,7 +1689,7 @@ MPI Rank 0: 08/16/2016 03:03:33: Starting Epoch 4: learning rate per sample = 0. MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:33: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:33: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: (GPU): creating curand object with seed 12 MPI Rank 0: (GPU): creating curand object with seed 13 MPI Rank 0: (GPU): creating curand object with seed 14 @@ -1709,7 +1709,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:34: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:34: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:34: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:03:34: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.49174185 * 2560; err = 0.61718750 * 2560; time = 0.1067s; samplesPerSecond = 23987.6 MPI Rank 0: 08/16/2016 03:03:34: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.49057629 * 2560; err = 0.64296875 * 2560; time = 0.1040s; samplesPerSecond = 24610.7 MPI Rank 0: 08/16/2016 03:03:34: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.37584589 * 2560; err = 0.62226563 * 2560; time = 0.1042s; samplesPerSecond = 24574.7 @@ -2171,7 +2171,7 @@ MPI Rank 1: 08/16/2016 03:03:33: Starting Epoch 4: learning rate per sample = 0. MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:33: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:33: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: (GPU): creating curand object with seed 32 MPI Rank 1: (GPU): creating curand object with seed 33 MPI Rank 1: (GPU): creating curand object with seed 34 @@ -2190,7 +2190,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:34: Starting Epoch 5: learning rate per sample = 0.001953 effective momentum = 0.900000 momentum as time constant = 2429.8 samples MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:34: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:34: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:03:34: Epoch[ 5 of 5]-Minibatch[ 1- 10, 12.50%]: ce = 2.49174185 * 2560; err = 0.61718750 * 2560; time = 0.1072s; samplesPerSecond = 23882.2 MPI Rank 1: 08/16/2016 03:03:34: Epoch[ 5 of 5]-Minibatch[ 11- 20, 25.00%]: ce = 2.49057629 * 2560; err = 0.64296875 * 2560; time = 0.1041s; samplesPerSecond = 24588.9 MPI Rank 1: 08/16/2016 03:03:34: Epoch[ 5 of 5]-Minibatch[ 21- 30, 37.50%]: ce = 2.37584589 * 2560; err = 0.62226563 * 2560; time = 0.1041s; samplesPerSecond = 24588.0 diff --git a/Tests/EndToEndTests/Speech/DNN/Dropout/testcases.yml b/Tests/EndToEndTests/Speech/DNN/Dropout/testcases.yml index 6bc0b2949..63c76382c 100644 --- a/Tests/EndToEndTests/Speech/DNN/Dropout/testcases.yml +++ b/Tests/EndToEndTests/Speech/DNN/Dropout/testcases.yml @@ -34,7 +34,7 @@ testCases: - ^MPI Rank {{integer}} - Starting minibatch loop - DataParallelSGD training - - MyRank = {{integer}} - - NumNodes = 2 - - NumGradientBits = 32 + - myRank = {{integer}} + - numNodes = 2 + - numGradientBits = 32 - distributed reading is ENABLED diff --git a/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.cpu.txt b/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.cpu.txt index 0cf688680..59b99329e 100644 --- a/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.cpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.cpu.txt @@ -616,7 +616,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:07:48: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:07:49: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.17725069 * 2560; EvalClassificationError = 0.59921875 * 2560; time = 0.5690s; samplesPerSecond = 4499.5 MPI Rank 0: 08/16/2016 10:07:50: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.13334089 * 2560; EvalClassificationError = 0.56835938 * 2560; time = 0.7981s; samplesPerSecond = 3207.4 MPI Rank 0: 08/16/2016 10:07:50: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.07967076 * 2560; EvalClassificationError = 0.56328125 * 2560; time = 0.4440s; samplesPerSecond = 5765.9 @@ -631,7 +631,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:07:53: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:07:54: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.97049696 * 10240; EvalClassificationError = 0.54658203 * 10240; time = 1.3527s; samplesPerSecond = 7570.1 MPI Rank 0: 08/16/2016 10:07:56: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.96995673 * 10240; EvalClassificationError = 0.54746094 * 10240; time = 1.6866s; samplesPerSecond = 6071.4 MPI Rank 0: 08/16/2016 10:07:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.97022685 * 20480; EvalClassificationError = 0.54702148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=3.0565s @@ -1169,7 +1169,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:07:48: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:07:49: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.17725069 * 2560; EvalClassificationError = 0.59921875 * 2560; time = 0.5643s; samplesPerSecond = 4537.0 MPI Rank 1: 08/16/2016 10:07:50: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.13334089 * 2560; EvalClassificationError = 0.56835938 * 2560; time = 0.7961s; samplesPerSecond = 3215.7 MPI Rank 1: 08/16/2016 10:07:50: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.07967076 * 2560; EvalClassificationError = 0.56328125 * 2560; time = 0.4504s; samplesPerSecond = 5684.3 @@ -1183,7 +1183,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:07:53: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:07:54: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.97049696 * 10240; EvalClassificationError = 0.54658203 * 10240; time = 1.3515s; samplesPerSecond = 7576.5 MPI Rank 1: 08/16/2016 10:07:56: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.96995673 * 10240; EvalClassificationError = 0.54746094 * 10240; time = 1.6863s; samplesPerSecond = 6072.6 MPI Rank 1: 08/16/2016 10:07:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.97022685 * 20480; EvalClassificationError = 0.54702148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=3.05591s @@ -1720,7 +1720,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:07:48: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:07:49: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.17725069 * 2560; EvalClassificationError = 0.59921875 * 2560; time = 0.5710s; samplesPerSecond = 4483.5 MPI Rank 2: 08/16/2016 10:07:50: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.13334089 * 2560; EvalClassificationError = 0.56835938 * 2560; time = 0.7894s; samplesPerSecond = 3242.9 MPI Rank 2: 08/16/2016 10:07:50: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.07967076 * 2560; EvalClassificationError = 0.56328125 * 2560; time = 0.4520s; samplesPerSecond = 5664.3 @@ -1734,7 +1734,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:07:53: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:07:54: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.97049696 * 10240; EvalClassificationError = 0.54658203 * 10240; time = 1.3532s; samplesPerSecond = 7567.4 MPI Rank 2: 08/16/2016 10:07:56: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.96995673 * 10240; EvalClassificationError = 0.54746094 * 10240; time = 1.6847s; samplesPerSecond = 6078.4 MPI Rank 2: 08/16/2016 10:07:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.97022685 * 20480; EvalClassificationError = 0.54702148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=3.05602s diff --git a/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.gpu.txt b/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.gpu.txt index e9830d2d0..28ae547c6 100644 --- a/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.gpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.gpu.txt @@ -617,7 +617,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:08:09: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.1484s; samplesPerSecond = 17245.9 MPI Rank 0: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1433s; samplesPerSecond = 17868.6 MPI Rank 0: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1419s; samplesPerSecond = 18039.2 @@ -632,7 +632,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:08:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:08:10: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.2875s; samplesPerSecond = 35614.4 MPI Rank 0: 08/16/2016 10:08:10: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.2827s; samplesPerSecond = 36224.7 MPI Rank 0: 08/16/2016 10:08:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.577049s @@ -1171,7 +1171,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:08:09: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.1487s; samplesPerSecond = 17211.0 MPI Rank 1: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1433s; samplesPerSecond = 17870.0 MPI Rank 1: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1419s; samplesPerSecond = 18038.8 @@ -1185,7 +1185,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:08:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:08:10: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.2878s; samplesPerSecond = 35576.6 MPI Rank 1: 08/16/2016 10:08:10: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.2827s; samplesPerSecond = 36218.6 MPI Rank 1: 08/16/2016 10:08:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.576897s @@ -1723,7 +1723,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:08:09: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.1485s; samplesPerSecond = 17237.9 MPI Rank 2: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1432s; samplesPerSecond = 17878.8 MPI Rank 2: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1419s; samplesPerSecond = 18040.0 @@ -1737,7 +1737,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:08:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:08:10: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.2880s; samplesPerSecond = 35554.3 MPI Rank 2: 08/16/2016 10:08:10: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.2824s; samplesPerSecond = 36264.2 MPI Rank 2: 08/16/2016 10:08:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.57713s diff --git a/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt index 91f02ba14..9b2f8ac04 100644 --- a/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt @@ -614,7 +614,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:19:07: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:19:07: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.15042048 * 2560; EvalClassificationError = 0.58867187 * 2560; time = 0.3458s; samplesPerSecond = 7402.6 MPI Rank 0: 08/16/2016 03:19:08: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.11694314 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.3452s; samplesPerSecond = 7416.2 MPI Rank 0: 08/16/2016 03:19:08: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.05986597 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3527s; samplesPerSecond = 7258.5 @@ -629,7 +629,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:19:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:19:11: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95622782 * 10240; EvalClassificationError = 0.54609375 * 10240; time = 0.6987s; samplesPerSecond = 14654.8 MPI Rank 0: 08/16/2016 03:19:11: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.95081435 * 10240; EvalClassificationError = 0.54355469 * 10240; time = 0.6563s; samplesPerSecond = 15601.8 MPI Rank 0: 08/16/2016 03:19:11: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95352108 * 20480; EvalClassificationError = 0.54482422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.38257s @@ -1168,7 +1168,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:19:07: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:19:07: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.15042048 * 2560; EvalClassificationError = 0.58867187 * 2560; time = 0.3464s; samplesPerSecond = 7390.3 MPI Rank 1: 08/16/2016 03:19:08: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.11694314 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.3453s; samplesPerSecond = 7413.0 MPI Rank 1: 08/16/2016 03:19:08: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.05986597 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3528s; samplesPerSecond = 7255.6 @@ -1182,7 +1182,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:19:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:19:11: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95622782 * 10240; EvalClassificationError = 0.54609375 * 10240; time = 0.7010s; samplesPerSecond = 14607.7 MPI Rank 1: 08/16/2016 03:19:11: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.95081435 * 10240; EvalClassificationError = 0.54355469 * 10240; time = 0.6562s; samplesPerSecond = 15604.6 MPI Rank 1: 08/16/2016 03:19:11: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95352108 * 20480; EvalClassificationError = 0.54482422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.38447s @@ -1720,7 +1720,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:19:07: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:19:07: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.15042048 * 2560; EvalClassificationError = 0.58867187 * 2560; time = 0.3462s; samplesPerSecond = 7394.9 MPI Rank 2: 08/16/2016 03:19:08: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.11694314 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.3453s; samplesPerSecond = 7413.9 MPI Rank 2: 08/16/2016 03:19:08: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.05986597 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3527s; samplesPerSecond = 7258.6 @@ -1734,7 +1734,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:19:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:19:11: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95622782 * 10240; EvalClassificationError = 0.54609375 * 10240; time = 0.7014s; samplesPerSecond = 14599.6 MPI Rank 2: 08/16/2016 03:19:11: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.95081435 * 10240; EvalClassificationError = 0.54355469 * 10240; time = 0.6564s; samplesPerSecond = 15599.2 MPI Rank 2: 08/16/2016 03:19:11: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95352108 * 20480; EvalClassificationError = 0.54482422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.38407s diff --git a/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt index 7dc5b9edb..c4a3616a0 100644 --- a/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt @@ -615,7 +615,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:19:31: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.2084s; samplesPerSecond = 12286.1 MPI Rank 0: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1804s; samplesPerSecond = 14191.5 MPI Rank 0: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1704s; samplesPerSecond = 15022.6 @@ -630,7 +630,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:19:33: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:19:33: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.3564s; samplesPerSecond = 28732.2 MPI Rank 0: 08/16/2016 03:19:34: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.3227s; samplesPerSecond = 31728.4 MPI Rank 0: 08/16/2016 03:19:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.689726s @@ -1170,7 +1170,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:19:31: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:19:32: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.2063s; samplesPerSecond = 12411.0 MPI Rank 1: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1808s; samplesPerSecond = 14158.0 MPI Rank 1: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1701s; samplesPerSecond = 15049.2 @@ -1184,7 +1184,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:19:33: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:19:33: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.3534s; samplesPerSecond = 28972.3 MPI Rank 1: 08/16/2016 03:19:34: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.3227s; samplesPerSecond = 31731.1 MPI Rank 1: 08/16/2016 03:19:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.690282s @@ -1723,7 +1723,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:19:31: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.2089s; samplesPerSecond = 12254.0 MPI Rank 2: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1802s; samplesPerSecond = 14210.1 MPI Rank 2: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1701s; samplesPerSecond = 15046.4 @@ -1737,7 +1737,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:19:33: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:19:33: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.3569s; samplesPerSecond = 28689.5 MPI Rank 2: 08/16/2016 03:19:34: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.3228s; samplesPerSecond = 31727.1 MPI Rank 2: 08/16/2016 03:19:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.689913s diff --git a/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/testcases.yml b/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/testcases.yml index 39f159ca9..7a3defe98 100644 --- a/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/testcases.yml +++ b/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/testcases.yml @@ -34,7 +34,7 @@ testCases: - ^MPI Rank {{integer}} - Starting minibatch loop - DataParallelSGD training - - MyRank = {{integer}} - - NumNodes = 3 - - NumGradientBits = 1 + - myRank = {{integer}} + - numNodes = 3 + - numGradientBits = 1 - distributed reading is ENABLED diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt index 0594c5a44..0adf7c7d8 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt @@ -622,7 +622,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:09:14: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.080039 MPI Rank 0: Async gradient aggregation wait time: 9e-06 MPI Rank 0: Actual gradient aggregation time: 0.025201 @@ -670,7 +670,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:09:19: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.011011 MPI Rank 0: Actual gradient aggregation time: 0.088497 MPI Rank 0: Async gradient aggregation wait time: 0.026596 @@ -687,7 +687,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:09:21: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 1.1e-05 MPI Rank 0: Actual gradient aggregation time: 0.023009 MPI Rank 0: Async gradient aggregation wait time: 1e-05 @@ -1240,7 +1240,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:09:14: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.092054 MPI Rank 1: Async gradient aggregation wait time: 0.029108 MPI Rank 1: Actual gradient aggregation time: 0.053094 @@ -1287,7 +1287,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:09:19: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.009871 MPI Rank 1: Actual gradient aggregation time: 0.084551 MPI Rank 1: Async gradient aggregation wait time: 0.067075 @@ -1303,7 +1303,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:09:21: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.162303 MPI Rank 1: Actual gradient aggregation time: 0.088365 MPI Rank 1: Async gradient aggregation wait time: 0.357011 @@ -1855,7 +1855,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:09:14: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.124401 MPI Rank 2: Async gradient aggregation wait time: 0.027767 MPI Rank 2: Actual gradient aggregation time: 0.053848 @@ -1902,7 +1902,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:09:19: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 1.1e-05 MPI Rank 2: Actual gradient aggregation time: 0.034828 MPI Rank 2: Async gradient aggregation wait time: 1.1e-05 @@ -1918,7 +1918,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:09:21: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.144867 MPI Rank 2: Actual gradient aggregation time: 0.087324 MPI Rank 2: Async gradient aggregation wait time: 0.337574 diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt index 0a6e05c57..07ddc0a67 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt @@ -623,7 +623,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:09:37: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.006881 MPI Rank 0: Async gradient aggregation wait time: 0.001169 MPI Rank 0: Actual gradient aggregation time: 0.012812 @@ -671,7 +671,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:09:38: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.003256 MPI Rank 0: Actual gradient aggregation time: 0.026681 MPI Rank 0: Async gradient aggregation wait time: 0.001712 @@ -688,7 +688,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:09:39: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.001962 MPI Rank 0: Actual gradient aggregation time: 0.02659 MPI Rank 0: Async gradient aggregation wait time: 0.003671 @@ -1242,7 +1242,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:09:37: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.017293 MPI Rank 1: Async gradient aggregation wait time: 0.001855 MPI Rank 1: Actual gradient aggregation time: 0.011879 @@ -1289,7 +1289,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:09:38: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.007435 MPI Rank 1: Actual gradient aggregation time: 0.028784 MPI Rank 1: Async gradient aggregation wait time: 0.006185 @@ -1305,7 +1305,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:09:39: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.001906 MPI Rank 1: Actual gradient aggregation time: 0.027016 MPI Rank 1: Async gradient aggregation wait time: 0.003939 @@ -1858,7 +1858,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:09:37: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.014665 MPI Rank 2: Async gradient aggregation wait time: 0.001294 MPI Rank 2: Actual gradient aggregation time: 0.011743 @@ -1905,7 +1905,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:09:38: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 3e-06 MPI Rank 2: Actual gradient aggregation time: 0.022531 MPI Rank 2: Async gradient aggregation wait time: 0.011564 @@ -1921,7 +1921,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:09:39: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.015928 MPI Rank 2: Actual gradient aggregation time: 0.027468 MPI Rank 2: Async gradient aggregation wait time: 0.001119 diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt index 3fb4d83a9..559a9e0f8 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt @@ -620,7 +620,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:20:28: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.017461 MPI Rank 0: Async gradient aggregation wait time: 0.004531 MPI Rank 0: Actual gradient aggregation time: 0.021009 @@ -668,7 +668,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:20:30: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 3e-006 MPI Rank 0: Actual gradient aggregation time: 0.020512 MPI Rank 0: Async gradient aggregation wait time: 3e-006 @@ -685,7 +685,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:20:32: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 5e-006 MPI Rank 0: Actual gradient aggregation time: 0.018185 MPI Rank 0: Async gradient aggregation wait time: 4e-006 @@ -1239,7 +1239,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:20:28: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.029656 MPI Rank 1: Async gradient aggregation wait time: 0.007273 MPI Rank 1: Actual gradient aggregation time: 0.021183 @@ -1286,7 +1286,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:20:30: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 2e-006 MPI Rank 1: Actual gradient aggregation time: 0.039428 MPI Rank 1: Async gradient aggregation wait time: 7e-006 @@ -1302,7 +1302,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:20:32: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 4e-006 MPI Rank 1: Actual gradient aggregation time: 0.032424 MPI Rank 1: Async gradient aggregation wait time: 0.002787 @@ -1855,7 +1855,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:20:28: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.032204 MPI Rank 2: Async gradient aggregation wait time: 0.010081 MPI Rank 2: Actual gradient aggregation time: 0.021164 @@ -1902,7 +1902,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:20:30: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.019786 MPI Rank 2: Actual gradient aggregation time: 0.040852 MPI Rank 2: Async gradient aggregation wait time: 0.024007 @@ -1918,7 +1918,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:20:32: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.011905 MPI Rank 2: Actual gradient aggregation time: 0.051704 MPI Rank 2: Async gradient aggregation wait time: 0.015128 diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt index 9e7826db3..a66d30a6c 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt @@ -621,7 +621,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:20:53: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.021385 MPI Rank 0: Async gradient aggregation wait time: 0.006373 MPI Rank 0: Actual gradient aggregation time: 0.017647 @@ -669,7 +669,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:20:55: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.014735 MPI Rank 0: Actual gradient aggregation time: 0.03433 MPI Rank 0: Async gradient aggregation wait time: 0.004733 @@ -686,7 +686,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:20:56: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.004776 MPI Rank 0: Actual gradient aggregation time: 0.028351 MPI Rank 0: Async gradient aggregation wait time: 0.008151 @@ -1241,7 +1241,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:20:53: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.016814 MPI Rank 1: Async gradient aggregation wait time: 0.004995 MPI Rank 1: Actual gradient aggregation time: 0.018553 @@ -1288,7 +1288,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:20:55: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.010824 MPI Rank 1: Actual gradient aggregation time: 0.034649 MPI Rank 1: Async gradient aggregation wait time: 0.018618 @@ -1304,7 +1304,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:20:56: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.006331 MPI Rank 1: Actual gradient aggregation time: 0.028676 MPI Rank 1: Async gradient aggregation wait time: 0.007827 @@ -1858,7 +1858,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:20:53: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.035327 MPI Rank 2: Async gradient aggregation wait time: 0.00284 MPI Rank 2: Actual gradient aggregation time: 0.018497 @@ -1905,7 +1905,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:20:55: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 1e-006 MPI Rank 2: Actual gradient aggregation time: 0.016322 MPI Rank 2: Async gradient aggregation wait time: 0.013477 @@ -1921,7 +1921,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:20:56: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.024966 MPI Rank 2: Actual gradient aggregation time: 0.028835 MPI Rank 2: Async gradient aggregation wait time: 0.002866 diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/testcases.yml b/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/testcases.yml index d018c1a5c..fe6f86d49 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/testcases.yml +++ b/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/testcases.yml @@ -34,8 +34,8 @@ testCases: - ^MPI Rank {{integer}} - Starting minibatch loop - DataParallelSGD training - - MyRank = {{integer}} - - NumNodes = 3 - - NumGradientBits = 1 + - myRank = {{integer}} + - numNodes = 3 + - numGradientBits = 1 - distributed reading is ENABLED - BufferedAsyncGradientAggregation is ENABLED diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.cpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.cpu.txt index 564c62e00..07470a905 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.cpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.cpu.txt @@ -611,7 +611,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:57:53: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:57:53: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:57:53: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:57:53: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.56962759 * 640; EvalClassificationError = 0.91093750 * 640; time = 0.2812s; samplesPerSecond = 2275.8 MPI Rank 0: 08/16/2016 09:57:54: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.33203458 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.2752s; samplesPerSecond = 2325.6 MPI Rank 0: 08/16/2016 09:57:54: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97802531 * 640; EvalClassificationError = 0.86875000 * 640; time = 0.2728s; samplesPerSecond = 2346.5 @@ -654,7 +654,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:58:31: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:58:31: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:58:31: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:58:32: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.12679700 * 2560; EvalClassificationError = 0.56601563 * 2560; time = 0.8259s; samplesPerSecond = 3099.5 MPI Rank 0: 08/16/2016 09:58:32: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.04568504 * 2560; EvalClassificationError = 0.55429688 * 2560; time = 0.8085s; samplesPerSecond = 3166.4 MPI Rank 0: 08/16/2016 09:58:33: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.02935394 * 2560; EvalClassificationError = 0.54570312 * 2560; time = 0.7974s; samplesPerSecond = 3210.4 @@ -672,7 +672,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:59:05: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:59:05: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:59:05: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:59:08: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.93108721 * 10240; EvalClassificationError = 0.52958984 * 10240; time = 2.8844s; samplesPerSecond = 3550.1 MPI Rank 0: 08/16/2016 09:59:10: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.95098710 * 10240; EvalClassificationError = 0.54755859 * 10240; time = 2.8076s; samplesPerSecond = 3647.2 MPI Rank 0: 08/16/2016 09:59:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.94103716 * 20480; EvalClassificationError = 0.53857422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=5.70009s @@ -1237,7 +1237,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:57:53: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:57:53: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:57:53: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:57:53: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.56962759 * 640; EvalClassificationError = 0.91093750 * 640; time = 0.2811s; samplesPerSecond = 2276.9 MPI Rank 1: 08/16/2016 09:57:54: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.33203458 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.2753s; samplesPerSecond = 2325.2 MPI Rank 1: 08/16/2016 09:57:54: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97802531 * 640; EvalClassificationError = 0.86875000 * 640; time = 0.2721s; samplesPerSecond = 2352.1 @@ -1278,7 +1278,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:58:31: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:58:31: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:58:31: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:58:32: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.12679700 * 2560; EvalClassificationError = 0.56601563 * 2560; time = 0.8264s; samplesPerSecond = 3097.6 MPI Rank 1: 08/16/2016 09:58:32: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.04568504 * 2560; EvalClassificationError = 0.55429688 * 2560; time = 0.8080s; samplesPerSecond = 3168.2 MPI Rank 1: 08/16/2016 09:58:33: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.02935394 * 2560; EvalClassificationError = 0.54570312 * 2560; time = 0.7977s; samplesPerSecond = 3209.3 @@ -1295,7 +1295,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:59:05: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:59:05: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:59:05: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:59:08: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.93108721 * 10240; EvalClassificationError = 0.52958984 * 10240; time = 2.8859s; samplesPerSecond = 3548.3 MPI Rank 1: 08/16/2016 09:59:10: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.95098710 * 10240; EvalClassificationError = 0.54755859 * 10240; time = 2.8075s; samplesPerSecond = 3647.4 MPI Rank 1: 08/16/2016 09:59:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.94103716 * 20480; EvalClassificationError = 0.53857422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=5.7s diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.gpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.gpu.txt index c42599166..842523b0b 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.gpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.gpu.txt @@ -612,7 +612,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:59:43: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:59:43: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:59:43: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:59:43: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.1080s; samplesPerSecond = 5925.3 MPI Rank 0: 08/16/2016 09:59:43: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.1000s; samplesPerSecond = 6397.8 MPI Rank 0: 08/16/2016 09:59:44: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.1013s; samplesPerSecond = 6319.2 @@ -655,7 +655,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:59:49: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:59:49: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:59:49: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:59:49: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.13894071 * 2560; EvalClassificationError = 0.56992188 * 2560; time = 0.1288s; samplesPerSecond = 19871.6 MPI Rank 0: 08/16/2016 09:59:49: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.06106261 * 2560; EvalClassificationError = 0.55664062 * 2560; time = 0.1249s; samplesPerSecond = 20497.2 MPI Rank 0: 08/16/2016 09:59:49: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.04459475 * 2560; EvalClassificationError = 0.55039063 * 2560; time = 0.1248s; samplesPerSecond = 20516.8 @@ -673,7 +673,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:59:51: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:59:51: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:59:51: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:59:51: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.94336419 * 10240; EvalClassificationError = 0.53056641 * 10240; time = 0.2783s; samplesPerSecond = 36788.6 MPI Rank 0: 08/16/2016 09:59:51: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.96525554 * 10240; EvalClassificationError = 0.54873047 * 10240; time = 0.2723s; samplesPerSecond = 37610.7 MPI Rank 0: 08/16/2016 09:59:51: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95430987 * 20480; EvalClassificationError = 0.53964844 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.560004s @@ -1239,7 +1239,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:59:43: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:59:43: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:59:43: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:59:43: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.1079s; samplesPerSecond = 5931.0 MPI Rank 1: 08/16/2016 09:59:43: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.1000s; samplesPerSecond = 6401.4 MPI Rank 1: 08/16/2016 09:59:44: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.1012s; samplesPerSecond = 6325.5 @@ -1280,7 +1280,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:59:49: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:59:49: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:59:49: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:59:49: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.13894071 * 2560; EvalClassificationError = 0.56992188 * 2560; time = 0.1286s; samplesPerSecond = 19904.8 MPI Rank 1: 08/16/2016 09:59:49: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.06106261 * 2560; EvalClassificationError = 0.55664062 * 2560; time = 0.1249s; samplesPerSecond = 20504.1 MPI Rank 1: 08/16/2016 09:59:49: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.04459475 * 2560; EvalClassificationError = 0.55039063 * 2560; time = 0.1247s; samplesPerSecond = 20526.1 @@ -1297,7 +1297,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:59:51: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:59:51: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:59:51: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:59:51: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.94336419 * 10240; EvalClassificationError = 0.53056641 * 10240; time = 0.2793s; samplesPerSecond = 36660.2 MPI Rank 1: 08/16/2016 09:59:51: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.96525554 * 10240; EvalClassificationError = 0.54873047 * 10240; time = 0.2722s; samplesPerSecond = 37620.1 MPI Rank 1: 08/16/2016 09:59:51: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95430987 * 20480; EvalClassificationError = 0.53964844 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.559871s diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.windows.cpu.txt index 405e2678b..361aa9ef2 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.windows.cpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.windows.cpu.txt @@ -610,7 +610,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:02:03: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:02:03: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:02:03: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:02:03: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.56731190 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.1102s; samplesPerSecond = 5809.0 MPI Rank 0: 08/16/2016 03:02:03: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.31208878 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.1297s; samplesPerSecond = 4932.6 MPI Rank 0: 08/16/2016 03:02:03: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97319840 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.1064s; samplesPerSecond = 6015.2 @@ -653,7 +653,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:02:11: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:02:11: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:02:11: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:02:11: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.09963072 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.2349s; samplesPerSecond = 10898.6 MPI Rank 0: 08/16/2016 03:02:11: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.02412622 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.2179s; samplesPerSecond = 11749.9 MPI Rank 0: 08/16/2016 03:02:11: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.00477550 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.2211s; samplesPerSecond = 11580.5 @@ -671,7 +671,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:02:16: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:02:16: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:02:16: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:02:17: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.90951347 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.7135s; samplesPerSecond = 14352.0 MPI Rank 0: 08/16/2016 03:02:17: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.93082770 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.6161s; samplesPerSecond = 16621.4 MPI Rank 0: 08/16/2016 03:02:17: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92017059 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.3412s @@ -1237,7 +1237,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:02:03: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:02:03: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:02:03: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:02:03: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.56731190 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.1103s; samplesPerSecond = 5804.6 MPI Rank 1: 08/16/2016 03:02:03: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.31208878 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.1297s; samplesPerSecond = 4935.8 MPI Rank 1: 08/16/2016 03:02:03: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97319840 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.1064s; samplesPerSecond = 6015.3 @@ -1278,7 +1278,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:02:11: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:02:11: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:02:11: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:02:11: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.09963072 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.2348s; samplesPerSecond = 10902.3 MPI Rank 1: 08/16/2016 03:02:11: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.02412622 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.2178s; samplesPerSecond = 11753.0 MPI Rank 1: 08/16/2016 03:02:11: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.00477550 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.2209s; samplesPerSecond = 11587.9 @@ -1295,7 +1295,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:02:16: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:02:16: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:02:16: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:02:17: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.90951347 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.7153s; samplesPerSecond = 14315.2 MPI Rank 1: 08/16/2016 03:02:17: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.93082770 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.6162s; samplesPerSecond = 16618.6 MPI Rank 1: 08/16/2016 03:02:17: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92017059 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.34119s diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.windows.gpu.txt index 2021d0206..9e00d7a4d 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.windows.gpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.windows.gpu.txt @@ -611,7 +611,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:02:36: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:02:36: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:02:36: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:02:36: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.1600s; samplesPerSecond = 3999.8 MPI Rank 0: 08/16/2016 03:02:36: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.1186s; samplesPerSecond = 5394.3 MPI Rank 0: 08/16/2016 03:02:36: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.1100s; samplesPerSecond = 5815.7 @@ -654,7 +654,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:02:43: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:02:43: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:02:43: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:02:43: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.13894071 * 2560; EvalClassificationError = 0.56992188 * 2560; time = 0.1674s; samplesPerSecond = 15289.7 MPI Rank 0: 08/16/2016 03:02:43: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.06106261 * 2560; EvalClassificationError = 0.55664063 * 2560; time = 0.1479s; samplesPerSecond = 17309.5 MPI Rank 0: 08/16/2016 03:02:43: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.04459475 * 2560; EvalClassificationError = 0.55039063 * 2560; time = 0.1413s; samplesPerSecond = 18120.8 @@ -672,7 +672,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:02:45: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:02:45: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:02:45: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:02:46: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.94336419 * 10240; EvalClassificationError = 0.53056641 * 10240; time = 0.3479s; samplesPerSecond = 29432.6 MPI Rank 0: 08/16/2016 03:02:46: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.96525554 * 10240; EvalClassificationError = 0.54873047 * 10240; time = 0.3252s; samplesPerSecond = 31484.1 MPI Rank 0: 08/16/2016 03:02:46: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95430987 * 20480; EvalClassificationError = 0.53964844 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.684367s @@ -1239,7 +1239,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:02:36: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:02:36: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:02:36: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:02:36: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.1549s; samplesPerSecond = 4132.8 MPI Rank 1: 08/16/2016 03:02:36: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.1179s; samplesPerSecond = 5427.5 MPI Rank 1: 08/16/2016 03:02:36: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.1100s; samplesPerSecond = 5817.1 @@ -1280,7 +1280,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:02:43: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:02:43: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:02:43: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:02:43: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.13894071 * 2560; EvalClassificationError = 0.56992188 * 2560; time = 0.1664s; samplesPerSecond = 15385.0 MPI Rank 1: 08/16/2016 03:02:43: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.06106261 * 2560; EvalClassificationError = 0.55664063 * 2560; time = 0.1471s; samplesPerSecond = 17399.5 MPI Rank 1: 08/16/2016 03:02:43: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.04459475 * 2560; EvalClassificationError = 0.55039063 * 2560; time = 0.1413s; samplesPerSecond = 18123.8 @@ -1297,7 +1297,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:02:45: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:02:45: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:02:45: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:02:46: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.94336419 * 10240; EvalClassificationError = 0.53056641 * 10240; time = 0.3456s; samplesPerSecond = 29626.5 MPI Rank 1: 08/16/2016 03:02:46: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.96525554 * 10240; EvalClassificationError = 0.54873047 * 10240; time = 0.3253s; samplesPerSecond = 31482.2 MPI Rank 1: 08/16/2016 03:02:46: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95430987 * 20480; EvalClassificationError = 0.53964844 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.684319s diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/testcases.yml b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/testcases.yml index 8e6f45f28..e778e08d0 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/testcases.yml +++ b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/testcases.yml @@ -43,7 +43,7 @@ testCases: - ^MPI Rank {{integer}} - Starting minibatch loop - DataParallelSGD training - - MyRank = {{integer}} - - NumNodes = 2 - - NumGradientBits = 64 + - myRank = {{integer}} + - numNodes = 2 + - numGradientBits = 64 - distributed reading is ENABLED diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.cpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.cpu.txt index 9ded86dae..e30bc3418 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.cpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.cpu.txt @@ -571,7 +571,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:59:56: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.62304965 * 640; EvalClassificationError = 0.93437500 * 640; time = 0.2660s; samplesPerSecond = 2405.9 MPI Rank 0: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.38132581 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.1966s; samplesPerSecond = 3255.6 MPI Rank 0: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98762394 * 640; EvalClassificationError = 0.88593750 * 640; time = 0.4635s; samplesPerSecond = 1380.7 @@ -610,7 +610,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:00:02: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:00:03: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.16726831 * 2560; EvalClassificationError = 0.57851562 * 2560; time = 0.5967s; samplesPerSecond = 4290.2 MPI Rank 0: 08/16/2016 10:00:03: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.08752017 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3821s; samplesPerSecond = 6699.7 MPI Rank 0: 08/16/2016 10:00:04: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.07134545 * 2560; EvalClassificationError = 0.55585938 * 2560; time = 0.6943s; samplesPerSecond = 3686.9 @@ -625,7 +625,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:00:06: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:00:07: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.97114275 * 10240; EvalClassificationError = 0.53769531 * 10240; time = 0.9884s; samplesPerSecond = 10360.6 MPI Rank 0: 08/16/2016 10:00:08: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.99390313 * 10240; EvalClassificationError = 0.55712891 * 10240; time = 1.4042s; samplesPerSecond = 7292.7 MPI Rank 0: 08/16/2016 10:00:08: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.98252294 * 20480; EvalClassificationError = 0.54741211 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=2.39916s @@ -1119,7 +1119,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:59:56: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.62304965 * 640; EvalClassificationError = 0.93437500 * 640; time = 0.2613s; samplesPerSecond = 2449.3 MPI Rank 1: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.38132581 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.1965s; samplesPerSecond = 3256.6 MPI Rank 1: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98762394 * 640; EvalClassificationError = 0.88593750 * 640; time = 0.4674s; samplesPerSecond = 1369.3 @@ -1157,7 +1157,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:00:02: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:00:03: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.16726831 * 2560; EvalClassificationError = 0.57851562 * 2560; time = 0.5922s; samplesPerSecond = 4323.0 MPI Rank 1: 08/16/2016 10:00:03: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.08752017 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3859s; samplesPerSecond = 6634.3 MPI Rank 1: 08/16/2016 10:00:04: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.07134545 * 2560; EvalClassificationError = 0.55585938 * 2560; time = 0.6904s; samplesPerSecond = 3707.8 @@ -1171,7 +1171,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:00:06: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:00:07: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.97114275 * 10240; EvalClassificationError = 0.53769531 * 10240; time = 0.9880s; samplesPerSecond = 10364.1 MPI Rank 1: 08/16/2016 10:00:08: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.99390313 * 10240; EvalClassificationError = 0.55712891 * 10240; time = 1.4042s; samplesPerSecond = 7292.5 MPI Rank 1: 08/16/2016 10:00:08: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.98252294 * 20480; EvalClassificationError = 0.54741211 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=2.39895s @@ -1664,7 +1664,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 09:59:56: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.62304965 * 640; EvalClassificationError = 0.93437500 * 640; time = 0.2618s; samplesPerSecond = 2444.4 MPI Rank 2: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.38132581 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.1965s; samplesPerSecond = 3256.9 MPI Rank 2: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98762394 * 640; EvalClassificationError = 0.88593750 * 640; time = 0.4698s; samplesPerSecond = 1362.4 @@ -1702,7 +1702,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:00:02: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:00:03: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.16726831 * 2560; EvalClassificationError = 0.57851562 * 2560; time = 0.5955s; samplesPerSecond = 4298.6 MPI Rank 2: 08/16/2016 10:00:03: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.08752017 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3830s; samplesPerSecond = 6684.6 MPI Rank 2: 08/16/2016 10:00:04: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.07134545 * 2560; EvalClassificationError = 0.55585938 * 2560; time = 0.6922s; samplesPerSecond = 3698.2 @@ -1716,7 +1716,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:00:06: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:00:07: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.97114275 * 10240; EvalClassificationError = 0.53769531 * 10240; time = 0.9897s; samplesPerSecond = 10346.1 MPI Rank 2: 08/16/2016 10:00:08: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.99390313 * 10240; EvalClassificationError = 0.55712891 * 10240; time = 1.4041s; samplesPerSecond = 7292.7 MPI Rank 2: 08/16/2016 10:00:08: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.98252294 * 20480; EvalClassificationError = 0.54741211 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=2.39757s diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.gpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.gpu.txt index 9fe00d658..c3eb64dd3 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.gpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.gpu.txt @@ -572,7 +572,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:00:15: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0693s; samplesPerSecond = 9238.4 MPI Rank 0: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0567s; samplesPerSecond = 11281.5 MPI Rank 0: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0566s; samplesPerSecond = 11312.8 @@ -611,7 +611,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:00:17: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.0612s; samplesPerSecond = 41819.8 MPI Rank 0: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0599s; samplesPerSecond = 42761.5 MPI Rank 0: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0599s; samplesPerSecond = 42762.2 @@ -626,7 +626,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:00:17: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:00:18: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.0894s; samplesPerSecond = 114526.0 MPI Rank 0: 08/16/2016 10:00:18: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.0835s; samplesPerSecond = 122653.8 MPI Rank 0: 08/16/2016 10:00:18: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.179159s @@ -1121,7 +1121,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:00:15: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0695s; samplesPerSecond = 9204.8 MPI Rank 1: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0563s; samplesPerSecond = 11359.2 MPI Rank 1: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0567s; samplesPerSecond = 11286.5 @@ -1159,7 +1159,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:00:17: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.0614s; samplesPerSecond = 41699.9 MPI Rank 1: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0597s; samplesPerSecond = 42846.6 MPI Rank 1: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0599s; samplesPerSecond = 42717.2 @@ -1173,7 +1173,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:00:17: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:00:18: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.0900s; samplesPerSecond = 113825.8 MPI Rank 1: 08/16/2016 10:00:18: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.0832s; samplesPerSecond = 123133.2 MPI Rank 1: 08/16/2016 10:00:18: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.179295s @@ -1667,7 +1667,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:00:15: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0697s; samplesPerSecond = 9180.2 MPI Rank 2: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0565s; samplesPerSecond = 11323.4 MPI Rank 2: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0568s; samplesPerSecond = 11270.0 @@ -1705,7 +1705,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:00:17: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.0612s; samplesPerSecond = 41826.0 MPI Rank 2: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0597s; samplesPerSecond = 42906.2 MPI Rank 2: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0601s; samplesPerSecond = 42627.6 @@ -1719,7 +1719,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:00:17: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:00:18: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.0899s; samplesPerSecond = 113873.9 MPI Rank 2: 08/16/2016 10:00:18: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.0835s; samplesPerSecond = 122592.2 MPI Rank 2: 08/16/2016 10:00:18: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.179288s diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.windows.cpu.txt index fd8b1591a..05ee13326 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.windows.cpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.windows.cpu.txt @@ -569,7 +569,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:02:57: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.56731197 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.0834s; samplesPerSecond = 7677.6 MPI Rank 0: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.31208884 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.0883s; samplesPerSecond = 7250.5 MPI Rank 0: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97319817 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.0900s; samplesPerSecond = 7107.4 @@ -608,7 +608,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:00: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.09962837 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.1410s; samplesPerSecond = 18159.2 MPI Rank 0: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.02412398 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.1391s; samplesPerSecond = 18410.2 MPI Rank 0: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.00477328 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.1341s; samplesPerSecond = 19084.0 @@ -623,7 +623,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:01: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:03:02: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.90951136 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.3803s; samplesPerSecond = 26929.2 MPI Rank 0: 08/16/2016 03:03:02: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.93082558 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.3565s; samplesPerSecond = 28721.0 MPI Rank 0: 08/16/2016 03:03:02: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92016847 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.746914s @@ -1118,7 +1118,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:02:57: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.56731197 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.0834s; samplesPerSecond = 7671.1 MPI Rank 1: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.31208884 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.0881s; samplesPerSecond = 7260.9 MPI Rank 1: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97319817 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.0901s; samplesPerSecond = 7105.9 @@ -1156,7 +1156,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:00: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.09962837 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.1412s; samplesPerSecond = 18128.6 MPI Rank 1: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.02412398 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.1389s; samplesPerSecond = 18430.0 MPI Rank 1: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.00477328 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.1342s; samplesPerSecond = 19081.7 @@ -1170,7 +1170,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:01: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:03:02: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.90951136 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.3812s; samplesPerSecond = 26864.6 MPI Rank 1: 08/16/2016 03:03:02: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.93082558 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.3564s; samplesPerSecond = 28728.9 MPI Rank 1: 08/16/2016 03:03:02: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92016847 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.746915s @@ -1664,7 +1664,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:02:57: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.56731197 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.0829s; samplesPerSecond = 7722.7 MPI Rank 2: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.31208884 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.0881s; samplesPerSecond = 7266.9 MPI Rank 2: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97319817 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.0901s; samplesPerSecond = 7102.3 @@ -1702,7 +1702,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:03:00: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.09962837 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.1413s; samplesPerSecond = 18119.0 MPI Rank 2: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.02412398 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.1389s; samplesPerSecond = 18436.9 MPI Rank 2: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.00477328 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.1341s; samplesPerSecond = 19089.9 @@ -1716,7 +1716,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:03:01: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:03:02: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.90951136 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.3813s; samplesPerSecond = 26856.3 MPI Rank 2: 08/16/2016 03:03:02: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.93082558 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.3565s; samplesPerSecond = 28725.1 MPI Rank 2: 08/16/2016 03:03:02: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92016847 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.746861s diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.windows.gpu.txt index e5b7443d1..f65e7e04a 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.windows.gpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.windows.gpu.txt @@ -570,7 +570,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:17: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.1000s; samplesPerSecond = 6400.3 MPI Rank 0: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0738s; samplesPerSecond = 8674.2 MPI Rank 0: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0762s; samplesPerSecond = 8396.9 @@ -609,7 +609,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:19: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.1045s; samplesPerSecond = 24493.4 MPI Rank 0: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0862s; samplesPerSecond = 29707.7 MPI Rank 0: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0768s; samplesPerSecond = 33337.7 @@ -624,7 +624,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:20: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:03:20: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.1518s; samplesPerSecond = 67460.3 MPI Rank 0: 08/16/2016 03:03:20: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.1251s; samplesPerSecond = 81832.3 MPI Rank 0: 08/16/2016 03:03:20: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.287003s @@ -1120,7 +1120,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:17: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0996s; samplesPerSecond = 6427.7 MPI Rank 1: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0742s; samplesPerSecond = 8629.6 MPI Rank 1: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0762s; samplesPerSecond = 8393.6 @@ -1158,7 +1158,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:19: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.1047s; samplesPerSecond = 24439.6 MPI Rank 1: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0858s; samplesPerSecond = 29832.0 MPI Rank 1: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0767s; samplesPerSecond = 33361.1 @@ -1172,7 +1172,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:20: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:03:20: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.1520s; samplesPerSecond = 67362.2 MPI Rank 1: 08/16/2016 03:03:20: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.1252s; samplesPerSecond = 81779.3 MPI Rank 1: 08/16/2016 03:03:20: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.287153s @@ -1667,7 +1667,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:03:17: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0986s; samplesPerSecond = 6490.4 MPI Rank 2: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0740s; samplesPerSecond = 8642.9 MPI Rank 2: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0763s; samplesPerSecond = 8383.7 @@ -1705,7 +1705,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:03:19: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.1034s; samplesPerSecond = 24758.7 MPI Rank 2: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0861s; samplesPerSecond = 29745.7 MPI Rank 2: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0769s; samplesPerSecond = 33306.0 @@ -1719,7 +1719,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:03:20: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:03:20: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.1506s; samplesPerSecond = 67980.7 MPI Rank 2: 08/16/2016 03:03:20: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.1252s; samplesPerSecond = 81781.3 MPI Rank 2: 08/16/2016 03:03:20: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.286844s diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/testcases.yml b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/testcases.yml index c11531d61..d830f7ee2 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/testcases.yml +++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/testcases.yml @@ -34,7 +34,7 @@ testCases: - ^MPI Rank {{integer}} - Starting minibatch loop - DataParallelSGD training - - MyRank = {{integer}} - - NumNodes = 3 - - NumGradientBits = 32 + - myRank = {{integer}} + - numNodes = 3 + - numGradientBits = 32 - distributed reading is ENABLED diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt index 9f8871d37..1225d8f56 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt @@ -622,7 +622,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:58:46: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.008473 MPI Rank 0: Async gradient aggregation wait time: 0.00554 MPI Rank 0: Actual gradient aggregation time: 0.020395 @@ -670,7 +670,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:58:51: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 1.1e-05 MPI Rank 0: Actual gradient aggregation time: 0.009207 MPI Rank 0: Async gradient aggregation wait time: 9e-06 @@ -687,7 +687,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:58:54: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.0046 MPI Rank 0: Actual gradient aggregation time: 0.069203 MPI Rank 0: Async gradient aggregation wait time: 0.041271 @@ -1240,7 +1240,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:58:46: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.063039 MPI Rank 1: Async gradient aggregation wait time: 0.022678 MPI Rank 1: Actual gradient aggregation time: 0.025025 @@ -1287,7 +1287,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:58:51: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.142203 MPI Rank 1: Actual gradient aggregation time: 0.157984 MPI Rank 1: Async gradient aggregation wait time: 0.179014 @@ -1303,7 +1303,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:58:54: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.04561 MPI Rank 1: Actual gradient aggregation time: 0.077514 MPI Rank 1: Async gradient aggregation wait time: 0.069416 @@ -1855,7 +1855,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 09:58:46: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.041047 MPI Rank 2: Async gradient aggregation wait time: 8e-06 MPI Rank 2: Actual gradient aggregation time: 0.004986 @@ -1902,7 +1902,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 09:58:51: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 9e-06 MPI Rank 2: Actual gradient aggregation time: 0.149714 MPI Rank 2: Async gradient aggregation wait time: 0.120817 @@ -1918,7 +1918,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 09:58:54: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 9e-06 MPI Rank 2: Actual gradient aggregation time: 0.002705 MPI Rank 2: Async gradient aggregation wait time: 9e-06 diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt index 9c197c4b6..73ffe0d87 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt @@ -623,7 +623,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:59:09: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.01227 MPI Rank 0: Async gradient aggregation wait time: 0.00776 MPI Rank 0: Actual gradient aggregation time: 0.011351 @@ -671,7 +671,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:59:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.000306 MPI Rank 0: Actual gradient aggregation time: 0.017813 MPI Rank 0: Async gradient aggregation wait time: 0.003066 @@ -688,7 +688,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:59:10: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.003211 MPI Rank 0: Actual gradient aggregation time: 0.026824 MPI Rank 0: Async gradient aggregation wait time: 0.002719 @@ -1242,7 +1242,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:59:09: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.0237 MPI Rank 1: Async gradient aggregation wait time: 0.00528 MPI Rank 1: Actual gradient aggregation time: 0.011811 @@ -1289,7 +1289,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:59:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.013339 MPI Rank 1: Actual gradient aggregation time: 0.032552 MPI Rank 1: Async gradient aggregation wait time: 0.007984 @@ -1305,7 +1305,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:59:10: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.002841 MPI Rank 1: Actual gradient aggregation time: 0.027316 MPI Rank 1: Async gradient aggregation wait time: 0.002301 @@ -1858,7 +1858,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 09:59:09: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.028506 MPI Rank 2: Async gradient aggregation wait time: 0.004749 MPI Rank 2: Actual gradient aggregation time: 0.011317 @@ -1905,7 +1905,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 09:59:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.003625 MPI Rank 2: Actual gradient aggregation time: 0.032563 MPI Rank 2: Async gradient aggregation wait time: 0.012159 @@ -1921,7 +1921,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 09:59:10: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.016234 MPI Rank 2: Actual gradient aggregation time: 0.026881 MPI Rank 2: Async gradient aggregation wait time: 0.005475 diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt index 96d004c10..7b5f621ba 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt @@ -617,7 +617,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:54: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.010875 MPI Rank 0: Async gradient aggregation wait time: 0.046041 MPI Rank 0: Actual gradient aggregation time: 0.070247 @@ -665,7 +665,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:59: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 2e-006 MPI Rank 0: Actual gradient aggregation time: 0.106492 MPI Rank 0: Async gradient aggregation wait time: 2e-006 @@ -682,7 +682,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:04:02: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 3e-006 MPI Rank 0: Actual gradient aggregation time: 0.010023 MPI Rank 0: Async gradient aggregation wait time: 3e-006 @@ -1233,7 +1233,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:54: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.033393 MPI Rank 1: Async gradient aggregation wait time: 0.005092 MPI Rank 1: Actual gradient aggregation time: 0.070288 @@ -1280,7 +1280,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:59: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.056538 MPI Rank 1: Actual gradient aggregation time: 0.181181 MPI Rank 1: Async gradient aggregation wait time: 3e-006 @@ -1296,7 +1296,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:04:02: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 2e-006 MPI Rank 1: Actual gradient aggregation time: 0.028628 MPI Rank 1: Async gradient aggregation wait time: 0.062352 @@ -1846,7 +1846,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:03:54: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.040872 MPI Rank 2: Async gradient aggregation wait time: 0.04797 MPI Rank 2: Actual gradient aggregation time: 0.070448 @@ -1893,7 +1893,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:03:59: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.103418 MPI Rank 2: Actual gradient aggregation time: 0.168332 MPI Rank 2: Async gradient aggregation wait time: 0.014615 @@ -1909,7 +1909,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:04:02: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 3e-006 MPI Rank 2: Actual gradient aggregation time: 0.092817 MPI Rank 2: Async gradient aggregation wait time: 0.095403 diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt index 05ca68e72..9382da138 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt +++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt @@ -618,7 +618,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:04:21: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.01782 MPI Rank 0: Async gradient aggregation wait time: 0.005297 MPI Rank 0: Actual gradient aggregation time: 0.025182 @@ -666,7 +666,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:04:23: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.003725 MPI Rank 0: Actual gradient aggregation time: 0.069103 MPI Rank 0: Async gradient aggregation wait time: 0.001861 @@ -683,7 +683,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:04:25: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.033541 MPI Rank 0: Actual gradient aggregation time: 0.07365 MPI Rank 0: Async gradient aggregation wait time: 0.011228 @@ -1235,7 +1235,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:04:21: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.020729 MPI Rank 1: Async gradient aggregation wait time: 0.009212 MPI Rank 1: Actual gradient aggregation time: 0.025214 @@ -1282,7 +1282,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:04:23: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.061455 MPI Rank 1: Actual gradient aggregation time: 0.070776 MPI Rank 1: Async gradient aggregation wait time: 0.04993 @@ -1298,7 +1298,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:04:25: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.006886 MPI Rank 1: Actual gradient aggregation time: 0.071953 MPI Rank 1: Async gradient aggregation wait time: 0.012085 @@ -1849,7 +1849,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:04:21: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.03301 MPI Rank 2: Async gradient aggregation wait time: 0.004502 MPI Rank 2: Actual gradient aggregation time: 0.025447 @@ -1896,7 +1896,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:04:23: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.001929 MPI Rank 2: Actual gradient aggregation time: 0.069767 MPI Rank 2: Async gradient aggregation wait time: 0.051731 @@ -1912,7 +1912,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:04:25: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.005577 MPI Rank 2: Actual gradient aggregation time: 0.072623 MPI Rank 2: Async gradient aggregation wait time: 0.000919 diff --git a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/testcases.yml b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/testcases.yml index 9c8a5bf14..bd60ea829 100644 --- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/testcases.yml +++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/testcases.yml @@ -34,8 +34,8 @@ testCases: - ^MPI Rank {{integer}} - Starting minibatch loop - DataParallelSGD training - - MyRank = {{integer}} - - NumNodes = 3 - - NumGradientBits = 64 + - myRank = {{integer}} + - numNodes = 3 + - numGradientBits = 64 - distributed reading is ENABLED - BufferedAsyncGradientAggregation is ENABLED diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.cpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.cpu.txt index 0cf688680..59b99329e 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.cpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.cpu.txt @@ -616,7 +616,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:07:48: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:07:49: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.17725069 * 2560; EvalClassificationError = 0.59921875 * 2560; time = 0.5690s; samplesPerSecond = 4499.5 MPI Rank 0: 08/16/2016 10:07:50: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.13334089 * 2560; EvalClassificationError = 0.56835938 * 2560; time = 0.7981s; samplesPerSecond = 3207.4 MPI Rank 0: 08/16/2016 10:07:50: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.07967076 * 2560; EvalClassificationError = 0.56328125 * 2560; time = 0.4440s; samplesPerSecond = 5765.9 @@ -631,7 +631,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:07:53: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:07:54: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.97049696 * 10240; EvalClassificationError = 0.54658203 * 10240; time = 1.3527s; samplesPerSecond = 7570.1 MPI Rank 0: 08/16/2016 10:07:56: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.96995673 * 10240; EvalClassificationError = 0.54746094 * 10240; time = 1.6866s; samplesPerSecond = 6071.4 MPI Rank 0: 08/16/2016 10:07:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.97022685 * 20480; EvalClassificationError = 0.54702148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=3.0565s @@ -1169,7 +1169,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:07:48: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:07:49: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.17725069 * 2560; EvalClassificationError = 0.59921875 * 2560; time = 0.5643s; samplesPerSecond = 4537.0 MPI Rank 1: 08/16/2016 10:07:50: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.13334089 * 2560; EvalClassificationError = 0.56835938 * 2560; time = 0.7961s; samplesPerSecond = 3215.7 MPI Rank 1: 08/16/2016 10:07:50: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.07967076 * 2560; EvalClassificationError = 0.56328125 * 2560; time = 0.4504s; samplesPerSecond = 5684.3 @@ -1183,7 +1183,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:07:53: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:07:54: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.97049696 * 10240; EvalClassificationError = 0.54658203 * 10240; time = 1.3515s; samplesPerSecond = 7576.5 MPI Rank 1: 08/16/2016 10:07:56: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.96995673 * 10240; EvalClassificationError = 0.54746094 * 10240; time = 1.6863s; samplesPerSecond = 6072.6 MPI Rank 1: 08/16/2016 10:07:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.97022685 * 20480; EvalClassificationError = 0.54702148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=3.05591s @@ -1720,7 +1720,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:07:48: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:07:49: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.17725069 * 2560; EvalClassificationError = 0.59921875 * 2560; time = 0.5710s; samplesPerSecond = 4483.5 MPI Rank 2: 08/16/2016 10:07:50: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.13334089 * 2560; EvalClassificationError = 0.56835938 * 2560; time = 0.7894s; samplesPerSecond = 3242.9 MPI Rank 2: 08/16/2016 10:07:50: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.07967076 * 2560; EvalClassificationError = 0.56328125 * 2560; time = 0.4520s; samplesPerSecond = 5664.3 @@ -1734,7 +1734,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:07:53: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:07:54: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.97049696 * 10240; EvalClassificationError = 0.54658203 * 10240; time = 1.3532s; samplesPerSecond = 7567.4 MPI Rank 2: 08/16/2016 10:07:56: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.96995673 * 10240; EvalClassificationError = 0.54746094 * 10240; time = 1.6847s; samplesPerSecond = 6078.4 MPI Rank 2: 08/16/2016 10:07:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.97022685 * 20480; EvalClassificationError = 0.54702148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=3.05602s diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.gpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.gpu.txt index e9830d2d0..28ae547c6 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.gpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.gpu.txt @@ -617,7 +617,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:08:09: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.1484s; samplesPerSecond = 17245.9 MPI Rank 0: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1433s; samplesPerSecond = 17868.6 MPI Rank 0: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1419s; samplesPerSecond = 18039.2 @@ -632,7 +632,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:08:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:08:10: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.2875s; samplesPerSecond = 35614.4 MPI Rank 0: 08/16/2016 10:08:10: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.2827s; samplesPerSecond = 36224.7 MPI Rank 0: 08/16/2016 10:08:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.577049s @@ -1171,7 +1171,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:08:09: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.1487s; samplesPerSecond = 17211.0 MPI Rank 1: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1433s; samplesPerSecond = 17870.0 MPI Rank 1: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1419s; samplesPerSecond = 18038.8 @@ -1185,7 +1185,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:08:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:08:10: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.2878s; samplesPerSecond = 35576.6 MPI Rank 1: 08/16/2016 10:08:10: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.2827s; samplesPerSecond = 36218.6 MPI Rank 1: 08/16/2016 10:08:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.576897s @@ -1723,7 +1723,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:08:09: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.1485s; samplesPerSecond = 17237.9 MPI Rank 2: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1432s; samplesPerSecond = 17878.8 MPI Rank 2: 08/16/2016 10:08:09: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1419s; samplesPerSecond = 18040.0 @@ -1737,7 +1737,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:08:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:08:10: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.2880s; samplesPerSecond = 35554.3 MPI Rank 2: 08/16/2016 10:08:10: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.2824s; samplesPerSecond = 36264.2 MPI Rank 2: 08/16/2016 10:08:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.57713s diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt index 91f02ba14..9b2f8ac04 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt @@ -614,7 +614,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:19:07: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:19:07: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.15042048 * 2560; EvalClassificationError = 0.58867187 * 2560; time = 0.3458s; samplesPerSecond = 7402.6 MPI Rank 0: 08/16/2016 03:19:08: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.11694314 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.3452s; samplesPerSecond = 7416.2 MPI Rank 0: 08/16/2016 03:19:08: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.05986597 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3527s; samplesPerSecond = 7258.5 @@ -629,7 +629,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:19:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:19:11: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95622782 * 10240; EvalClassificationError = 0.54609375 * 10240; time = 0.6987s; samplesPerSecond = 14654.8 MPI Rank 0: 08/16/2016 03:19:11: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.95081435 * 10240; EvalClassificationError = 0.54355469 * 10240; time = 0.6563s; samplesPerSecond = 15601.8 MPI Rank 0: 08/16/2016 03:19:11: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95352108 * 20480; EvalClassificationError = 0.54482422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.38257s @@ -1168,7 +1168,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:19:07: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:19:07: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.15042048 * 2560; EvalClassificationError = 0.58867187 * 2560; time = 0.3464s; samplesPerSecond = 7390.3 MPI Rank 1: 08/16/2016 03:19:08: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.11694314 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.3453s; samplesPerSecond = 7413.0 MPI Rank 1: 08/16/2016 03:19:08: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.05986597 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3528s; samplesPerSecond = 7255.6 @@ -1182,7 +1182,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:19:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:19:11: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95622782 * 10240; EvalClassificationError = 0.54609375 * 10240; time = 0.7010s; samplesPerSecond = 14607.7 MPI Rank 1: 08/16/2016 03:19:11: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.95081435 * 10240; EvalClassificationError = 0.54355469 * 10240; time = 0.6562s; samplesPerSecond = 15604.6 MPI Rank 1: 08/16/2016 03:19:11: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95352108 * 20480; EvalClassificationError = 0.54482422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.38447s @@ -1720,7 +1720,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:19:07: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:19:07: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.15042048 * 2560; EvalClassificationError = 0.58867187 * 2560; time = 0.3462s; samplesPerSecond = 7394.9 MPI Rank 2: 08/16/2016 03:19:08: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.11694314 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.3453s; samplesPerSecond = 7413.9 MPI Rank 2: 08/16/2016 03:19:08: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.05986597 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3527s; samplesPerSecond = 7258.6 @@ -1734,7 +1734,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:19:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:19:11: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95622782 * 10240; EvalClassificationError = 0.54609375 * 10240; time = 0.7014s; samplesPerSecond = 14599.6 MPI Rank 2: 08/16/2016 03:19:11: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.95081435 * 10240; EvalClassificationError = 0.54355469 * 10240; time = 0.6564s; samplesPerSecond = 15599.2 MPI Rank 2: 08/16/2016 03:19:11: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95352108 * 20480; EvalClassificationError = 0.54482422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.38407s diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt index 7dc5b9edb..c4a3616a0 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt @@ -615,7 +615,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:19:31: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.2084s; samplesPerSecond = 12286.1 MPI Rank 0: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1804s; samplesPerSecond = 14191.5 MPI Rank 0: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1704s; samplesPerSecond = 15022.6 @@ -630,7 +630,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:19:33: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:19:33: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.3564s; samplesPerSecond = 28732.2 MPI Rank 0: 08/16/2016 03:19:34: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.3227s; samplesPerSecond = 31728.4 MPI Rank 0: 08/16/2016 03:19:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.689726s @@ -1170,7 +1170,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:19:31: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:19:32: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.2063s; samplesPerSecond = 12411.0 MPI Rank 1: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1808s; samplesPerSecond = 14158.0 MPI Rank 1: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1701s; samplesPerSecond = 15049.2 @@ -1184,7 +1184,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:19:33: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:19:33: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.3534s; samplesPerSecond = 28972.3 MPI Rank 1: 08/16/2016 03:19:34: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.3227s; samplesPerSecond = 31731.1 MPI Rank 1: 08/16/2016 03:19:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.690282s @@ -1723,7 +1723,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:19:31: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.2089s; samplesPerSecond = 12254.0 MPI Rank 2: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1802s; samplesPerSecond = 14210.1 MPI Rank 2: 08/16/2016 03:19:32: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1701s; samplesPerSecond = 15046.4 @@ -1737,7 +1737,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:19:33: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:19:33: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.3569s; samplesPerSecond = 28689.5 MPI Rank 2: 08/16/2016 03:19:34: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.3228s; samplesPerSecond = 31727.1 MPI Rank 2: 08/16/2016 03:19:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.689913s diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/testcases.yml b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/testcases.yml index 798652303..ae6c6be48 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/testcases.yml +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/testcases.yml @@ -34,7 +34,7 @@ testCases: - ^MPI Rank {{integer}} - Starting minibatch loop - DataParallelSGD training - - MyRank = {{integer}} - - NumNodes = 3 - - NumGradientBits = 1 + - myRank = {{integer}} + - numNodes = 3 + - numGradientBits = 1 - distributed reading is ENABLED diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt index 0594c5a44..0adf7c7d8 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt @@ -622,7 +622,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:09:14: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.080039 MPI Rank 0: Async gradient aggregation wait time: 9e-06 MPI Rank 0: Actual gradient aggregation time: 0.025201 @@ -670,7 +670,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:09:19: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.011011 MPI Rank 0: Actual gradient aggregation time: 0.088497 MPI Rank 0: Async gradient aggregation wait time: 0.026596 @@ -687,7 +687,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:09:21: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 1.1e-05 MPI Rank 0: Actual gradient aggregation time: 0.023009 MPI Rank 0: Async gradient aggregation wait time: 1e-05 @@ -1240,7 +1240,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:09:14: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.092054 MPI Rank 1: Async gradient aggregation wait time: 0.029108 MPI Rank 1: Actual gradient aggregation time: 0.053094 @@ -1287,7 +1287,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:09:19: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.009871 MPI Rank 1: Actual gradient aggregation time: 0.084551 MPI Rank 1: Async gradient aggregation wait time: 0.067075 @@ -1303,7 +1303,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:09:21: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.162303 MPI Rank 1: Actual gradient aggregation time: 0.088365 MPI Rank 1: Async gradient aggregation wait time: 0.357011 @@ -1855,7 +1855,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:09:14: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.124401 MPI Rank 2: Async gradient aggregation wait time: 0.027767 MPI Rank 2: Actual gradient aggregation time: 0.053848 @@ -1902,7 +1902,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:09:19: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 1.1e-05 MPI Rank 2: Actual gradient aggregation time: 0.034828 MPI Rank 2: Async gradient aggregation wait time: 1.1e-05 @@ -1918,7 +1918,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:09:21: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.144867 MPI Rank 2: Actual gradient aggregation time: 0.087324 MPI Rank 2: Async gradient aggregation wait time: 0.337574 diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt index 0a6e05c57..07ddc0a67 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt @@ -623,7 +623,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:09:37: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.006881 MPI Rank 0: Async gradient aggregation wait time: 0.001169 MPI Rank 0: Actual gradient aggregation time: 0.012812 @@ -671,7 +671,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:09:38: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.003256 MPI Rank 0: Actual gradient aggregation time: 0.026681 MPI Rank 0: Async gradient aggregation wait time: 0.001712 @@ -688,7 +688,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:09:39: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.001962 MPI Rank 0: Actual gradient aggregation time: 0.02659 MPI Rank 0: Async gradient aggregation wait time: 0.003671 @@ -1242,7 +1242,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:09:37: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.017293 MPI Rank 1: Async gradient aggregation wait time: 0.001855 MPI Rank 1: Actual gradient aggregation time: 0.011879 @@ -1289,7 +1289,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:09:38: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.007435 MPI Rank 1: Actual gradient aggregation time: 0.028784 MPI Rank 1: Async gradient aggregation wait time: 0.006185 @@ -1305,7 +1305,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:09:39: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.001906 MPI Rank 1: Actual gradient aggregation time: 0.027016 MPI Rank 1: Async gradient aggregation wait time: 0.003939 @@ -1858,7 +1858,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:09:37: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.014665 MPI Rank 2: Async gradient aggregation wait time: 0.001294 MPI Rank 2: Actual gradient aggregation time: 0.011743 @@ -1905,7 +1905,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:09:38: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 3e-06 MPI Rank 2: Actual gradient aggregation time: 0.022531 MPI Rank 2: Async gradient aggregation wait time: 0.011564 @@ -1921,7 +1921,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:09:39: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.015928 MPI Rank 2: Actual gradient aggregation time: 0.027468 MPI Rank 2: Async gradient aggregation wait time: 0.001119 diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt index 3fb4d83a9..559a9e0f8 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt @@ -620,7 +620,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:20:28: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.017461 MPI Rank 0: Async gradient aggregation wait time: 0.004531 MPI Rank 0: Actual gradient aggregation time: 0.021009 @@ -668,7 +668,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:20:30: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 3e-006 MPI Rank 0: Actual gradient aggregation time: 0.020512 MPI Rank 0: Async gradient aggregation wait time: 3e-006 @@ -685,7 +685,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:20:32: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 5e-006 MPI Rank 0: Actual gradient aggregation time: 0.018185 MPI Rank 0: Async gradient aggregation wait time: 4e-006 @@ -1239,7 +1239,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:20:28: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.029656 MPI Rank 1: Async gradient aggregation wait time: 0.007273 MPI Rank 1: Actual gradient aggregation time: 0.021183 @@ -1286,7 +1286,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:20:30: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 2e-006 MPI Rank 1: Actual gradient aggregation time: 0.039428 MPI Rank 1: Async gradient aggregation wait time: 7e-006 @@ -1302,7 +1302,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:20:32: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 4e-006 MPI Rank 1: Actual gradient aggregation time: 0.032424 MPI Rank 1: Async gradient aggregation wait time: 0.002787 @@ -1855,7 +1855,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:20:28: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.032204 MPI Rank 2: Async gradient aggregation wait time: 0.010081 MPI Rank 2: Actual gradient aggregation time: 0.021164 @@ -1902,7 +1902,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:20:30: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.019786 MPI Rank 2: Actual gradient aggregation time: 0.040852 MPI Rank 2: Async gradient aggregation wait time: 0.024007 @@ -1918,7 +1918,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:20:32: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.011905 MPI Rank 2: Actual gradient aggregation time: 0.051704 MPI Rank 2: Async gradient aggregation wait time: 0.015128 diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt index 9e7826db3..a66d30a6c 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt @@ -621,7 +621,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:20:53: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.021385 MPI Rank 0: Async gradient aggregation wait time: 0.006373 MPI Rank 0: Actual gradient aggregation time: 0.017647 @@ -669,7 +669,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:20:55: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.014735 MPI Rank 0: Actual gradient aggregation time: 0.03433 MPI Rank 0: Async gradient aggregation wait time: 0.004733 @@ -686,7 +686,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:20:56: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.004776 MPI Rank 0: Actual gradient aggregation time: 0.028351 MPI Rank 0: Async gradient aggregation wait time: 0.008151 @@ -1241,7 +1241,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:20:53: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.016814 MPI Rank 1: Async gradient aggregation wait time: 0.004995 MPI Rank 1: Actual gradient aggregation time: 0.018553 @@ -1288,7 +1288,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:20:55: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.010824 MPI Rank 1: Actual gradient aggregation time: 0.034649 MPI Rank 1: Async gradient aggregation wait time: 0.018618 @@ -1304,7 +1304,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:20:56: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.006331 MPI Rank 1: Actual gradient aggregation time: 0.028676 MPI Rank 1: Async gradient aggregation wait time: 0.007827 @@ -1858,7 +1858,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:20:53: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.035327 MPI Rank 2: Async gradient aggregation wait time: 0.00284 MPI Rank 2: Actual gradient aggregation time: 0.018497 @@ -1905,7 +1905,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:20:55: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 1e-006 MPI Rank 2: Actual gradient aggregation time: 0.016322 MPI Rank 2: Async gradient aggregation wait time: 0.013477 @@ -1921,7 +1921,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:20:56: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.024966 MPI Rank 2: Actual gradient aggregation time: 0.028835 MPI Rank 2: Async gradient aggregation wait time: 0.002866 diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/testcases.yml b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/testcases.yml index 2ebe96128..2943c8c42 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/testcases.yml +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/testcases.yml @@ -34,8 +34,8 @@ testCases: - ^MPI Rank {{integer}} - Starting minibatch loop - DataParallelSGD training - - MyRank = {{integer}} - - NumNodes = 3 - - NumGradientBits = 1 + - myRank = {{integer}} + - numNodes = 3 + - numGradientBits = 1 - distributed reading is ENABLED - BufferedAsyncGradientAggregation is ENABLED diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.cpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.cpu.txt index 9ded86dae..e30bc3418 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.cpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.cpu.txt @@ -571,7 +571,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:59:56: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.62304965 * 640; EvalClassificationError = 0.93437500 * 640; time = 0.2660s; samplesPerSecond = 2405.9 MPI Rank 0: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.38132581 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.1966s; samplesPerSecond = 3255.6 MPI Rank 0: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98762394 * 640; EvalClassificationError = 0.88593750 * 640; time = 0.4635s; samplesPerSecond = 1380.7 @@ -610,7 +610,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:00:02: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:00:03: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.16726831 * 2560; EvalClassificationError = 0.57851562 * 2560; time = 0.5967s; samplesPerSecond = 4290.2 MPI Rank 0: 08/16/2016 10:00:03: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.08752017 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3821s; samplesPerSecond = 6699.7 MPI Rank 0: 08/16/2016 10:00:04: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.07134545 * 2560; EvalClassificationError = 0.55585938 * 2560; time = 0.6943s; samplesPerSecond = 3686.9 @@ -625,7 +625,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:00:06: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:00:07: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.97114275 * 10240; EvalClassificationError = 0.53769531 * 10240; time = 0.9884s; samplesPerSecond = 10360.6 MPI Rank 0: 08/16/2016 10:00:08: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.99390313 * 10240; EvalClassificationError = 0.55712891 * 10240; time = 1.4042s; samplesPerSecond = 7292.7 MPI Rank 0: 08/16/2016 10:00:08: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.98252294 * 20480; EvalClassificationError = 0.54741211 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=2.39916s @@ -1119,7 +1119,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:59:56: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.62304965 * 640; EvalClassificationError = 0.93437500 * 640; time = 0.2613s; samplesPerSecond = 2449.3 MPI Rank 1: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.38132581 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.1965s; samplesPerSecond = 3256.6 MPI Rank 1: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98762394 * 640; EvalClassificationError = 0.88593750 * 640; time = 0.4674s; samplesPerSecond = 1369.3 @@ -1157,7 +1157,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:00:02: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:00:03: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.16726831 * 2560; EvalClassificationError = 0.57851562 * 2560; time = 0.5922s; samplesPerSecond = 4323.0 MPI Rank 1: 08/16/2016 10:00:03: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.08752017 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3859s; samplesPerSecond = 6634.3 MPI Rank 1: 08/16/2016 10:00:04: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.07134545 * 2560; EvalClassificationError = 0.55585938 * 2560; time = 0.6904s; samplesPerSecond = 3707.8 @@ -1171,7 +1171,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:00:06: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:00:07: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.97114275 * 10240; EvalClassificationError = 0.53769531 * 10240; time = 0.9880s; samplesPerSecond = 10364.1 MPI Rank 1: 08/16/2016 10:00:08: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.99390313 * 10240; EvalClassificationError = 0.55712891 * 10240; time = 1.4042s; samplesPerSecond = 7292.5 MPI Rank 1: 08/16/2016 10:00:08: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.98252294 * 20480; EvalClassificationError = 0.54741211 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=2.39895s @@ -1664,7 +1664,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 09:59:56: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.62304965 * 640; EvalClassificationError = 0.93437500 * 640; time = 0.2618s; samplesPerSecond = 2444.4 MPI Rank 2: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.38132581 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.1965s; samplesPerSecond = 3256.9 MPI Rank 2: 08/16/2016 09:59:57: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98762394 * 640; EvalClassificationError = 0.88593750 * 640; time = 0.4698s; samplesPerSecond = 1362.4 @@ -1702,7 +1702,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:00:02: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:00:03: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.16726831 * 2560; EvalClassificationError = 0.57851562 * 2560; time = 0.5955s; samplesPerSecond = 4298.6 MPI Rank 2: 08/16/2016 10:00:03: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.08752017 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3830s; samplesPerSecond = 6684.6 MPI Rank 2: 08/16/2016 10:00:04: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.07134545 * 2560; EvalClassificationError = 0.55585938 * 2560; time = 0.6922s; samplesPerSecond = 3698.2 @@ -1716,7 +1716,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:00:06: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:00:07: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.97114275 * 10240; EvalClassificationError = 0.53769531 * 10240; time = 0.9897s; samplesPerSecond = 10346.1 MPI Rank 2: 08/16/2016 10:00:08: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.99390313 * 10240; EvalClassificationError = 0.55712891 * 10240; time = 1.4041s; samplesPerSecond = 7292.7 MPI Rank 2: 08/16/2016 10:00:08: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.98252294 * 20480; EvalClassificationError = 0.54741211 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=2.39757s diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.gpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.gpu.txt index 9fe00d658..c3eb64dd3 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.gpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.gpu.txt @@ -572,7 +572,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:00:15: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0693s; samplesPerSecond = 9238.4 MPI Rank 0: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0567s; samplesPerSecond = 11281.5 MPI Rank 0: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0566s; samplesPerSecond = 11312.8 @@ -611,7 +611,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:00:17: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.0612s; samplesPerSecond = 41819.8 MPI Rank 0: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0599s; samplesPerSecond = 42761.5 MPI Rank 0: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0599s; samplesPerSecond = 42762.2 @@ -626,7 +626,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 10:00:17: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 10:00:18: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.0894s; samplesPerSecond = 114526.0 MPI Rank 0: 08/16/2016 10:00:18: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.0835s; samplesPerSecond = 122653.8 MPI Rank 0: 08/16/2016 10:00:18: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.179159s @@ -1121,7 +1121,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:00:15: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0695s; samplesPerSecond = 9204.8 MPI Rank 1: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0563s; samplesPerSecond = 11359.2 MPI Rank 1: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0567s; samplesPerSecond = 11286.5 @@ -1159,7 +1159,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:00:17: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.0614s; samplesPerSecond = 41699.9 MPI Rank 1: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0597s; samplesPerSecond = 42846.6 MPI Rank 1: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0599s; samplesPerSecond = 42717.2 @@ -1173,7 +1173,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 10:00:17: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 10:00:18: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.0900s; samplesPerSecond = 113825.8 MPI Rank 1: 08/16/2016 10:00:18: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.0832s; samplesPerSecond = 123133.2 MPI Rank 1: 08/16/2016 10:00:18: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.179295s @@ -1667,7 +1667,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:00:15: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.12%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0697s; samplesPerSecond = 9180.2 MPI Rank 2: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0565s; samplesPerSecond = 11323.4 MPI Rank 2: 08/16/2016 10:00:15: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0568s; samplesPerSecond = 11270.0 @@ -1705,7 +1705,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:00:17: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.0612s; samplesPerSecond = 41826.0 MPI Rank 2: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0597s; samplesPerSecond = 42906.2 MPI Rank 2: 08/16/2016 10:00:17: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0601s; samplesPerSecond = 42627.6 @@ -1719,7 +1719,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 10:00:17: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 10:00:18: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.0899s; samplesPerSecond = 113873.9 MPI Rank 2: 08/16/2016 10:00:18: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.0835s; samplesPerSecond = 122592.2 MPI Rank 2: 08/16/2016 10:00:18: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.179288s diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.windows.cpu.txt index fd8b1591a..05ee13326 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.windows.cpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.windows.cpu.txt @@ -569,7 +569,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:02:57: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.56731197 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.0834s; samplesPerSecond = 7677.6 MPI Rank 0: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.31208884 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.0883s; samplesPerSecond = 7250.5 MPI Rank 0: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97319817 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.0900s; samplesPerSecond = 7107.4 @@ -608,7 +608,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:00: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.09962837 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.1410s; samplesPerSecond = 18159.2 MPI Rank 0: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.02412398 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.1391s; samplesPerSecond = 18410.2 MPI Rank 0: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.00477328 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.1341s; samplesPerSecond = 19084.0 @@ -623,7 +623,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:01: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:03:02: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.90951136 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.3803s; samplesPerSecond = 26929.2 MPI Rank 0: 08/16/2016 03:03:02: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.93082558 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.3565s; samplesPerSecond = 28721.0 MPI Rank 0: 08/16/2016 03:03:02: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92016847 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.746914s @@ -1118,7 +1118,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:02:57: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.56731197 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.0834s; samplesPerSecond = 7671.1 MPI Rank 1: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.31208884 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.0881s; samplesPerSecond = 7260.9 MPI Rank 1: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97319817 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.0901s; samplesPerSecond = 7105.9 @@ -1156,7 +1156,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:00: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.09962837 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.1412s; samplesPerSecond = 18128.6 MPI Rank 1: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.02412398 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.1389s; samplesPerSecond = 18430.0 MPI Rank 1: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.00477328 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.1342s; samplesPerSecond = 19081.7 @@ -1170,7 +1170,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:01: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:03:02: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.90951136 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.3812s; samplesPerSecond = 26864.6 MPI Rank 1: 08/16/2016 03:03:02: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.93082558 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.3564s; samplesPerSecond = 28728.9 MPI Rank 1: 08/16/2016 03:03:02: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92016847 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.746915s @@ -1664,7 +1664,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:02:57: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.56731197 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.0829s; samplesPerSecond = 7722.7 MPI Rank 2: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.31208884 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.0881s; samplesPerSecond = 7266.9 MPI Rank 2: 08/16/2016 03:02:57: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.97319817 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.0901s; samplesPerSecond = 7102.3 @@ -1702,7 +1702,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:03:00: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.09962837 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.1413s; samplesPerSecond = 18119.0 MPI Rank 2: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.02412398 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.1389s; samplesPerSecond = 18436.9 MPI Rank 2: 08/16/2016 03:03:00: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 2.00477328 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.1341s; samplesPerSecond = 19089.9 @@ -1716,7 +1716,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:03:01: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:03:02: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.90951136 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.3813s; samplesPerSecond = 26856.3 MPI Rank 2: 08/16/2016 03:03:02: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.93082558 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.3565s; samplesPerSecond = 28725.1 MPI Rank 2: 08/16/2016 03:03:02: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92016847 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.746861s diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.windows.gpu.txt index e5b7443d1..f65e7e04a 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.windows.gpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.windows.gpu.txt @@ -570,7 +570,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:17: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.1000s; samplesPerSecond = 6400.3 MPI Rank 0: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0738s; samplesPerSecond = 8674.2 MPI Rank 0: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0762s; samplesPerSecond = 8396.9 @@ -609,7 +609,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:19: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.1045s; samplesPerSecond = 24493.4 MPI Rank 0: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0862s; samplesPerSecond = 29707.7 MPI Rank 0: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0768s; samplesPerSecond = 33337.7 @@ -624,7 +624,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:20: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 0: 08/16/2016 03:03:20: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.1518s; samplesPerSecond = 67460.3 MPI Rank 0: 08/16/2016 03:03:20: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.1251s; samplesPerSecond = 81832.3 MPI Rank 0: 08/16/2016 03:03:20: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.287003s @@ -1120,7 +1120,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:17: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0996s; samplesPerSecond = 6427.7 MPI Rank 1: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0742s; samplesPerSecond = 8629.6 MPI Rank 1: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0762s; samplesPerSecond = 8393.6 @@ -1158,7 +1158,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:19: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.1047s; samplesPerSecond = 24439.6 MPI Rank 1: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0858s; samplesPerSecond = 29832.0 MPI Rank 1: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0767s; samplesPerSecond = 33361.1 @@ -1172,7 +1172,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:20: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 1: 08/16/2016 03:03:20: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.1520s; samplesPerSecond = 67362.2 MPI Rank 1: 08/16/2016 03:03:20: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.1252s; samplesPerSecond = 81779.3 MPI Rank 1: 08/16/2016 03:03:20: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.287153s @@ -1667,7 +1667,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:03:17: Starting Epoch 1: learning rate per sample = 0.015625 effective momentum = 0.900000 momentum as time constant = 607.4 samples MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 1- 10, 3.13%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0986s; samplesPerSecond = 6490.4 MPI Rank 2: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 11- 20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0740s; samplesPerSecond = 8642.9 MPI Rank 2: 08/16/2016 03:03:17: Epoch[ 1 of 3]-Minibatch[ 21- 30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0763s; samplesPerSecond = 8383.7 @@ -1705,7 +1705,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:03:19: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 1- 10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.1034s; samplesPerSecond = 24758.7 MPI Rank 2: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 11- 20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0861s; samplesPerSecond = 29745.7 MPI Rank 2: 08/16/2016 03:03:20: Epoch[ 2 of 3]-Minibatch[ 21- 30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0769s; samplesPerSecond = 33306.0 @@ -1719,7 +1719,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:03:20: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED. MPI Rank 2: 08/16/2016 03:03:20: Epoch[ 3 of 3]-Minibatch[ 1- 10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.1506s; samplesPerSecond = 67980.7 MPI Rank 2: 08/16/2016 03:03:20: Epoch[ 3 of 3]-Minibatch[ 11- 20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.1252s; samplesPerSecond = 81781.3 MPI Rank 2: 08/16/2016 03:03:20: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.286844s diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/testcases.yml b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/testcases.yml index 6323cfd83..950551373 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/testcases.yml +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/testcases.yml @@ -34,7 +34,7 @@ testCases: - ^MPI Rank {{integer}} - Starting minibatch loop - DataParallelSGD training - - MyRank = {{integer}} - - NumNodes = 3 - - NumGradientBits = 32 + - myRank = {{integer}} + - numNodes = 3 + - numGradientBits = 32 - distributed reading is ENABLED diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt index 9f8871d37..1225d8f56 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt @@ -622,7 +622,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:58:46: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.008473 MPI Rank 0: Async gradient aggregation wait time: 0.00554 MPI Rank 0: Actual gradient aggregation time: 0.020395 @@ -670,7 +670,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:58:51: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 1.1e-05 MPI Rank 0: Actual gradient aggregation time: 0.009207 MPI Rank 0: Async gradient aggregation wait time: 9e-06 @@ -687,7 +687,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:58:54: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.0046 MPI Rank 0: Actual gradient aggregation time: 0.069203 MPI Rank 0: Async gradient aggregation wait time: 0.041271 @@ -1240,7 +1240,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:58:46: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.063039 MPI Rank 1: Async gradient aggregation wait time: 0.022678 MPI Rank 1: Actual gradient aggregation time: 0.025025 @@ -1287,7 +1287,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:58:51: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.142203 MPI Rank 1: Actual gradient aggregation time: 0.157984 MPI Rank 1: Async gradient aggregation wait time: 0.179014 @@ -1303,7 +1303,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:58:54: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.04561 MPI Rank 1: Actual gradient aggregation time: 0.077514 MPI Rank 1: Async gradient aggregation wait time: 0.069416 @@ -1855,7 +1855,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 09:58:46: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.041047 MPI Rank 2: Async gradient aggregation wait time: 8e-06 MPI Rank 2: Actual gradient aggregation time: 0.004986 @@ -1902,7 +1902,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 09:58:51: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 9e-06 MPI Rank 2: Actual gradient aggregation time: 0.149714 MPI Rank 2: Async gradient aggregation wait time: 0.120817 @@ -1918,7 +1918,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 09:58:54: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 9e-06 MPI Rank 2: Actual gradient aggregation time: 0.002705 MPI Rank 2: Async gradient aggregation wait time: 9e-06 diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt index 9c197c4b6..73ffe0d87 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt @@ -623,7 +623,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:59:09: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.01227 MPI Rank 0: Async gradient aggregation wait time: 0.00776 MPI Rank 0: Actual gradient aggregation time: 0.011351 @@ -671,7 +671,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:59:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.000306 MPI Rank 0: Actual gradient aggregation time: 0.017813 MPI Rank 0: Async gradient aggregation wait time: 0.003066 @@ -688,7 +688,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 09:59:10: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.003211 MPI Rank 0: Actual gradient aggregation time: 0.026824 MPI Rank 0: Async gradient aggregation wait time: 0.002719 @@ -1242,7 +1242,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:59:09: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.0237 MPI Rank 1: Async gradient aggregation wait time: 0.00528 MPI Rank 1: Actual gradient aggregation time: 0.011811 @@ -1289,7 +1289,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:59:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.013339 MPI Rank 1: Actual gradient aggregation time: 0.032552 MPI Rank 1: Async gradient aggregation wait time: 0.007984 @@ -1305,7 +1305,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 09:59:10: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.002841 MPI Rank 1: Actual gradient aggregation time: 0.027316 MPI Rank 1: Async gradient aggregation wait time: 0.002301 @@ -1858,7 +1858,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 09:59:09: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.028506 MPI Rank 2: Async gradient aggregation wait time: 0.004749 MPI Rank 2: Actual gradient aggregation time: 0.011317 @@ -1905,7 +1905,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 09:59:10: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.003625 MPI Rank 2: Actual gradient aggregation time: 0.032563 MPI Rank 2: Async gradient aggregation wait time: 0.012159 @@ -1921,7 +1921,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 09:59:10: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.016234 MPI Rank 2: Actual gradient aggregation time: 0.026881 MPI Rank 2: Async gradient aggregation wait time: 0.005475 diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt index 96d004c10..7b5f621ba 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt @@ -617,7 +617,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:54: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.010875 MPI Rank 0: Async gradient aggregation wait time: 0.046041 MPI Rank 0: Actual gradient aggregation time: 0.070247 @@ -665,7 +665,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:03:59: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 2e-006 MPI Rank 0: Actual gradient aggregation time: 0.106492 MPI Rank 0: Async gradient aggregation wait time: 2e-006 @@ -682,7 +682,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:04:02: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 3e-006 MPI Rank 0: Actual gradient aggregation time: 0.010023 MPI Rank 0: Async gradient aggregation wait time: 3e-006 @@ -1233,7 +1233,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:54: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.033393 MPI Rank 1: Async gradient aggregation wait time: 0.005092 MPI Rank 1: Actual gradient aggregation time: 0.070288 @@ -1280,7 +1280,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:03:59: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.056538 MPI Rank 1: Actual gradient aggregation time: 0.181181 MPI Rank 1: Async gradient aggregation wait time: 3e-006 @@ -1296,7 +1296,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:04:02: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 2e-006 MPI Rank 1: Actual gradient aggregation time: 0.028628 MPI Rank 1: Async gradient aggregation wait time: 0.062352 @@ -1846,7 +1846,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:03:54: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.040872 MPI Rank 2: Async gradient aggregation wait time: 0.04797 MPI Rank 2: Actual gradient aggregation time: 0.070448 @@ -1893,7 +1893,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:03:59: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.103418 MPI Rank 2: Actual gradient aggregation time: 0.168332 MPI Rank 2: Async gradient aggregation wait time: 0.014615 @@ -1909,7 +1909,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:04:02: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 3e-006 MPI Rank 2: Actual gradient aggregation time: 0.092817 MPI Rank 2: Async gradient aggregation wait time: 0.095403 diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt index 05ca68e72..9382da138 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt @@ -618,7 +618,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:04:21: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Actual gradient aggregation time: 0.01782 MPI Rank 0: Async gradient aggregation wait time: 0.005297 MPI Rank 0: Actual gradient aggregation time: 0.025182 @@ -666,7 +666,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:04:23: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.003725 MPI Rank 0: Actual gradient aggregation time: 0.069103 MPI Rank 0: Async gradient aggregation wait time: 0.001861 @@ -683,7 +683,7 @@ MPI Rank 0: MPI Rank 0: 08/16/2016 03:04:25: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses MPI Rank 0: -MPI Rank 0: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 0: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 0: Async gradient aggregation wait time: 0.033541 MPI Rank 0: Actual gradient aggregation time: 0.07365 MPI Rank 0: Async gradient aggregation wait time: 0.011228 @@ -1235,7 +1235,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:04:21: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Actual gradient aggregation time: 0.020729 MPI Rank 1: Async gradient aggregation wait time: 0.009212 MPI Rank 1: Actual gradient aggregation time: 0.025214 @@ -1282,7 +1282,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:04:23: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.061455 MPI Rank 1: Actual gradient aggregation time: 0.070776 MPI Rank 1: Async gradient aggregation wait time: 0.04993 @@ -1298,7 +1298,7 @@ MPI Rank 1: MPI Rank 1: 08/16/2016 03:04:25: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses MPI Rank 1: -MPI Rank 1: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 1: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 1: Async gradient aggregation wait time: 0.006886 MPI Rank 1: Actual gradient aggregation time: 0.071953 MPI Rank 1: Async gradient aggregation wait time: 0.012085 @@ -1849,7 +1849,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:04:21: Starting Epoch 2: learning rate per sample = 0.001953 effective momentum = 0.656119 momentum as time constant = 607.5 samples MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Actual gradient aggregation time: 0.03301 MPI Rank 2: Async gradient aggregation wait time: 0.004502 MPI Rank 2: Actual gradient aggregation time: 0.025447 @@ -1896,7 +1896,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:04:23: Starting Epoch 3: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.001929 MPI Rank 2: Actual gradient aggregation time: 0.069767 MPI Rank 2: Async gradient aggregation wait time: 0.051731 @@ -1912,7 +1912,7 @@ MPI Rank 2: MPI Rank 2: 08/16/2016 03:04:25: Starting Epoch 4: learning rate per sample = 0.000098 effective momentum = 0.656119 momentum as time constant = 2429.9 samples MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses MPI Rank 2: -MPI Rank 2: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. +MPI Rank 2: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED. MPI Rank 2: Async gradient aggregation wait time: 0.005577 MPI Rank 2: Actual gradient aggregation time: 0.072623 MPI Rank 2: Async gradient aggregation wait time: 0.000919 diff --git a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/testcases.yml b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/testcases.yml index 9bc9eb86e..abbd68e8b 100644 --- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/testcases.yml +++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/testcases.yml @@ -34,8 +34,8 @@ testCases: - ^MPI Rank {{integer}} - Starting minibatch loop - DataParallelSGD training - - MyRank = {{integer}} - - NumNodes = 3 - - NumGradientBits = 64 + - myRank = {{integer}} + - numNodes = 3 + - numGradientBits = 64 - distributed reading is ENABLED - BufferedAsyncGradientAggregation is ENABLED diff --git a/Tutorials/ImageHandsOn/ImageHandsOn_Task6.cntk b/Tutorials/ImageHandsOn/ImageHandsOn_Task6.cntk index aec775deb..abfe7f1fc 100644 --- a/Tutorials/ImageHandsOn/ImageHandsOn_Task6.cntk +++ b/Tutorials/ImageHandsOn/ImageHandsOn_Task6.cntk @@ -98,7 +98,7 @@ TrainConvNet = { parallelizationMethod = "DataParallelSGD" parallelizationStartEpoch = 1 distributedMBReading = true - dataParallelSGD = { gradientBits = 2 } + dataParallelSGD = { gradientBits = 2:1 } } AutoAdjust = { autoAdjustMinibatch = true # enable automatic growing of minibatch size