Integrate fseide/samplebs into master

2016-09-20 16:23:36 -07:00 · 2016-09-20 16:23:36 -07:00 · 7c5fb2d7d8
--- a/Source/SGDLib/SGD.cpp
+++ b/Source/SGDLib/SGD.cpp
@ -141,15 +141,15 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
    else
    {
        LOGPRINTF(stderr, "Training criteria:\n");
-    for (const auto& node : criterionNodes)
-    {
-        LOGPRINTF(stderr, "\t%ls = %ls\n", node->NodeName().c_str(), node->OperationName().c_str());
-    }
-    if (criterionNodes.empty())
-    {
-        LOGPRINTF(stderr, "\t(none)\n");
-        InvalidArgument("TrainOrAdaptModel: No criterion node was specified.");
-    }
+        for (const auto& node : criterionNodes)
+        {
+            LOGPRINTF(stderr, "\t%ls = %ls\n", node->NodeName().c_str(), node->OperationName().c_str());
+        }
+        if (criterionNodes.empty())
+        {
+            LOGPRINTF(stderr, "\t(none)\n");
+            InvalidArgument("TrainOrAdaptModel: No criterion node was specified.");
+        }
    }

    // determine evaluationNodes from GetEvalCriterionNodes(), ensuring each criterion is only logged once
@ -277,10 +277,10 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
    {
        fprintf(stderr, "out of %d parameter tensors and %d nodes with gradient:\n\n",
            (int)learnableNodes.size(), (int)numNeedsGradient);
-    for (let nodeDescription : nodesToUpdateDescriptions)
-    {
-        LOGPRINTF(stderr, "\t%ls\n", nodeDescription.c_str());
-    }
+        for (let nodeDescription : nodesToUpdateDescriptions)
+        {
+            LOGPRINTF(stderr, "\t%ls\n", nodeDescription.c_str());
+        }
    }

    // one blank line before training progress log
@ -302,16 +302,20 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
    for (int i = 0; i < m_numPrevLearnRates; i++)
        prevLearnRates[i] = -1.0;

+    m_prevChosenMinibatchSize = m_mbSize[startEpoch];
+
+    int currentNumGradientBits = 0; // this remembers the last #gradient bits we set for dataParallelSGD (init val 0 has no meaning, just keep compiler happy)
    if (GetParallelizationMethod() == ParallelizationMethod::dataParallelSGD)
    {
-        InitDistGradAgg(evaluationNodes.size(), m_traceLevel);
+        currentNumGradientBits = m_numGradientBits[startEpoch]; // remember so that we can detect a change
+        InitDistGradAgg(evaluationNodes.size(), currentNumGradientBits, m_traceLevel);
    }
    else if (GetParallelizationMethod() == ParallelizationMethod::modelAveragingSGD || 
             GetParallelizationMethod() == ParallelizationMethod::blockMomentumSGD)
    {
        InitModelAggregationHandler(m_syncStatsTrace, net->GetDeviceId());
    }
-    
+
    // precompute mean and invStdDev nodes and save initial model
    // When no precompute, only save if we did not load the model from a 
    // checkpoint but instead built it from a network description
@ -385,6 +389,14 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
            m_mpi->WaitAll();
        }

+        // (re-)initialize 1-bit SGD
+        if (GetParallelizationMethod() == ParallelizationMethod::dataParallelSGD &&
+            currentNumGradientBits != m_numGradientBits[i])
+        {
+            currentNumGradientBits = m_numGradientBits[i];
+            InitDistGradAgg(evaluationNodes.size(), currentNumGradientBits, m_traceLevel);
+        }
+
        Timer timer;
        timer.Start();

@ -464,6 +476,8 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,
                                                          criterionNodes, evaluationNodes,
                                                          inputMatrices, learnableNodes,
                                                          smoothedGradients, smoothedCounts, learningRateAdjustmentFactor);
+            if (m_traceLevel < 1 && chosenMinibatchSize != m_prevChosenMinibatchSize)
+                LOGPRINTF(stderr, "Minibatch size adapted to %d.\n", (int)chosenMinibatchSize);
            m_prevChosenMinibatchSize = chosenMinibatchSize;
        }
        else
@ -476,9 +490,11 @@ void SGD<ElemType>::TrainOrAdaptModel(int startEpoch, ComputationNetworkPtr net,

        double momentumPerSample = GetMomentumPerSample(i /*BUGBUG workaround:*/, trainSetDataReader->GetNumParallelSequencesForFixingBPTTMode());
        // time constant = number of samples after which a contribution has been reduced to e^-1
-        double momentumAsTimeConstant = momentumPerSample == 0.0 ? 0.0
-                                                                 : momentumPerSample >= 1.0 ? 0.0
-                                                                                            : -1.0 / log(momentumPerSample);
+        double momentumAsTimeConstant = momentumPerSample == 0.0
+                                        ? 0.0
+                                        : momentumPerSample >= 1.0
+                                            ? 0.0
+                                            : -1.0 / log(momentumPerSample);
        if (m_traceLevel > 0)
        {
            fprintf(stderr, "\n");
@ -863,8 +879,8 @@ size_t SGD<ElemType>::TrainOneEpoch(ComputationNetworkPtr net,
        LOGPRINTF(stderr, "Starting minibatch loop");
        if (useGradientAggregation)
        {
-            fprintf(stderr, ", DataParallelSGD training (MyRank = %d, NumNodes = %d, NumGradientBits = %d)",
-                    (int) m_mpi->CurrentNodeRank(), (int) m_mpi->NumNodesInUse(), (int) m_numGradientBits);
+            fprintf(stderr, ", DataParallelSGD training (myRank = %d, numNodes = %d, numGradientBits = %d)",
+                    (int) m_mpi->CurrentNodeRank(), (int) m_mpi->NumNodesInUse(), (int) m_numGradientBits[epochNumber]);

            if (m_bufferedAsyncGradientAggregation)
                fprintf(stderr, ", BufferedAsyncGradientAggregation is ENABLED");
@ -1754,8 +1770,8 @@ size_t SGD<ElemType>::SearchForBestMinibatchSize(ComputationNetworkPtr net,
    LOGPRINTF(stderr, " AdaptiveMinibatchSearch Epoch[%d]: Evaluating minibatchSizes %d..%d\n",
        (int)epochNumber + 1, (int)RoundToMultipleOf64(minMinibatchSize), (int)RoundToMultipleOf64(maxMinibatchSize));

-    size_t lastTriedTrialMinibatchSize = 0;
-    EpochCriterion lastTriedTrialEpochCriterion(0);
+    size_t lastGoodMinibatchSize = 0;
+    EpochCriterion lastGoodEpochCriterion(0);
    for (float trialMinibatchSizeFloat = (float) minMinibatchSize;
         trialMinibatchSizeFloat <= maxMinibatchSize;
         trialMinibatchSizeFloat *= minibatchSizeTuningFactor)
@ -1786,15 +1802,15 @@ size_t SGD<ElemType>::SearchForBestMinibatchSize(ComputationNetworkPtr net,
            // for the first iteration of the loop only, set baseCriterion
            // to the result we got from TrainOneMiniEpochAndReloadModel().
            baseCriterion = epochCriterion;
-            lastTriedTrialMinibatchSize = trialMinibatchSize;
-            lastTriedTrialEpochCriterion = baseCriterion;
+            lastGoodMinibatchSize = trialMinibatchSize;
+            lastGoodEpochCriterion = baseCriterion;
            isFirstIteration = false;

            if (m_traceLevel > 0)
            {
                LOGPRINTF(stderr, " AdaptiveMinibatchSearch Epoch[%d]: Computed baseCriterion %.8f for minibatchSize=%d\n",
                          (int)epochNumber + 1, baseCriterion.Average(), (int)trialMinibatchSize);
-        }
+            }
        }
        else if (!epochCriterion.IsNan() &&
                 epochCriterion.Average() > (baseCriterion.Average() * (1.0 + (m_minibatchSearchCriterionErrorMargin / 100.0))))
@ -1807,8 +1823,8 @@ size_t SGD<ElemType>::SearchForBestMinibatchSize(ComputationNetworkPtr net,
        }
        else
        {
-            lastTriedTrialMinibatchSize = trialMinibatchSize;
-            lastTriedTrialEpochCriterion = epochCriterion;
+            lastGoodMinibatchSize = trialMinibatchSize;
+            lastGoodEpochCriterion = epochCriterion;
            if (m_traceLevel > 0 && trialMinibatchSizeFloat * minibatchSizeTuningFactor <= maxMinibatchSize)
            {
                LOGPRINTF(stderr, " AdaptiveMinibatchSearch Epoch[%d]: Keep searching... epochCriterion = %.8f vs. baseCriterion = %.8f\n",
@ -1816,10 +1832,12 @@ size_t SGD<ElemType>::SearchForBestMinibatchSize(ComputationNetworkPtr net,
            }
        }
    }
-    LOGPRINTF(stderr, " AdaptiveMinibatchSearch Epoch[%d]: Search successful. New minibatchSize is %d. epochCriterion = %.8f vs baseCriterion = %.8f\n",
-              (int)epochNumber+1, (int) lastTriedTrialMinibatchSize, lastTriedTrialEpochCriterion.Average(), baseCriterion.Average());
-
-    return lastTriedTrialMinibatchSize;
+    if (m_traceLevel > 0)
+    {
+        LOGPRINTF(stderr, " AdaptiveMinibatchSearch Epoch[%d]: Search successful. New minibatchSize is %d. epochCriterion = %.8f vs baseCriterion = %.8f\n",
+                  (int)epochNumber + 1, (int)lastGoodMinibatchSize, lastGoodEpochCriterion.Average(), baseCriterion.Average());
+    }
+    return lastGoodMinibatchSize;
 }

 // run training over a small subset of an epoch, used by automatic LR and MB-size tuning
@ -1905,31 +1923,24 @@ void SGD<ElemType>::AttemptUtteranceDerivativeFeatures(ComputationNetworkPtr net
 }

 template <class ElemType>
-void SGD<ElemType>::InitDistGradAgg(int numEvalNodes, int traceLevel)
+void SGD<ElemType>::InitDistGradAgg(int numEvalNodes, int numGradientBits, int traceLevel)
 {
-    if (GetParallelizationMethod() == ParallelizationMethod::dataParallelSGD)
-    {
-        if (m_distGradAgg == nullptr)
-        {
+    assert(GetParallelizationMethod() == ParallelizationMethod::dataParallelSGD);
+    if (traceLevel > 0)
+        fprintf(stderr, "Initializing dataParallelSGD for %d-bit quantization.\n", numGradientBits);
+
 #ifdef CNTK_PARALLEL_TRAINING_SUPPORT
-            m_distGradAgg = std::make_shared<AllReduceDistGradAggregator<ElemType>>(m_mpi, m_numGradientBits, m_zeroThresholdFor1Bit, true /*useQuantizationForSelfStripe*/, m_bufferedAsyncGradientAggregation, traceLevel, m_syncStatsTrace);
+    m_distGradAgg = std::make_shared<AllReduceDistGradAggregator<ElemType>>(m_mpi, numGradientBits, m_zeroThresholdFor1Bit, true /*useQuantizationForSelfStripe*/, m_bufferedAsyncGradientAggregation, traceLevel, m_syncStatsTrace);
 #else
-            if (m_numGradientBits != (8 * sizeof(ElemType)))
-            {
-                RuntimeError("Gradient quantization is unsupported in CNTK binaries built without quantized gradient aggregation support!");
-            }
-
-            m_distGradAgg = std::make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, m_bufferedAsyncGradientAggregation, m_syncStatsTrace);
-#endif // !CNTK_PARALLEL_TRAINING_SUPPORT
-        }
-
-        if (m_gradHeader == nullptr)
-        {
-            m_gradHeader.reset(DistGradHeader::Create(numEvalNodes), [](DistGradHeader* ptr) {
-                DistGradHeader::Destroy(ptr);
-            });
-        }
+    if (numGradientBits != (8 * sizeof(ElemType)))
+    {
+        RuntimeError("Gradient quantization is unsupported in CNTK binaries built without quantized gradient aggregation support!");
    }
+
+    m_distGradAgg = std::make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, m_bufferedAsyncGradientAggregation, m_syncStatsTrace);
+#endif // !CNTK_PARALLEL_TRAINING_SUPPORT
+
+    m_gradHeader.reset(DistGradHeader::Create(numEvalNodes), [](DistGradHeader* ptr) { DistGradHeader::Destroy(ptr); });
 }

 template <class ElemType>
@ -2651,7 +2662,7 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)

    // parallel training
    m_parallelizationMethod = ParallelizationMethod::none;
-    m_numGradientBits = 32;
+    m_numGradientBits = vector<int>{8 * (int)sizeofElemType}; // means no quantization
    m_zeroThresholdFor1Bit = true;
    m_bufferedAsyncGradientAggregation = false;
    m_enableDistributedMBReading = false;
@ -2682,13 +2693,14 @@ SGDParams::SGDParams(const ConfigRecordType& configSGD, size_t sizeofElemType)
        if (configParallelTrain.Exists(L"DataParallelSGD"))
        {
            const ConfigRecordType& configDataParallelSGD(configParallelTrain(L"DataParallelSGD", ConfigRecordType::Record()));
-            size_t defaultGradientBits = 8 * sizeofElemType;
-            m_numGradientBits = configDataParallelSGD(L"gradientBits", defaultGradientBits);
+            let defaultGradientBits = 8 * (int)sizeofElemType;
+            m_numGradientBits = configDataParallelSGD(L"gradientBits", ConfigRecordType::Array(intargvector(vector<int>{defaultGradientBits})));
            m_zeroThresholdFor1Bit = configDataParallelSGD(L"useZeroThresholdFor1BitQuantization", true);
            m_bufferedAsyncGradientAggregation = configDataParallelSGD(L"useBufferedAsyncGradientAggregation", false);
-                if ( m_numGradientBits < 1 || m_numGradientBits > (8 * sizeofElemType) )
+            for (size_t i = 0; i < m_numGradientBits.size(); i++)
            {
-                InvalidArgument("gradientBits must be in the range [1, 32] when using precision=float and in range [1, 64] when using precision=double!");
+                if (m_numGradientBits[i] < 1 || m_numGradientBits[i] > defaultGradientBits)
+                    InvalidArgument("gradientBits values must be in the range [1, 32] when using precision=float and in range [1, 64] when using precision=double.");
            }
        }
        if (configParallelTrain.Exists(L"ModelAveragingSGD"))
--- a/Source/SGDLib/SGD.h
+++ b/Source/SGDLib/SGD.h
@ -264,7 +264,7 @@ protected:
    int m_syncStatsTrace;

    // Data parallel SGD training parameters
-    int m_numGradientBits;
+    intargvector m_numGradientBits;
    bool m_bufferedAsyncGradientAggregation;
    bool m_zeroThresholdFor1Bit;

@ -470,7 +470,7 @@ protected:
                         /*out*/ std::vector<EpochCriterion>& epochEvalErrors,
                         const std::string& prefixMsg = "");

-    void InitDistGradAgg(int numEvalNodes, int traceLevel);
+    void InitDistGradAgg(int numEvalNodes, int numGradientBits, int traceLevel);
    void InitModelAggregationHandler(int traceLevel, DEVICEID_TYPE devID);
 public:
    // UpdateWeights() - actual weight update, implementing various update rules
--- a/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.linux.cpu.txt
+++ b/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.linux.cpu.txt
@ -545,7 +545,7 @@ BlockRandomizer::StartEpoch: epoch 0: frames [0..10000] (first sequence at sampl
 08/16/2016 10:01:28: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at sample 10000), data subset 0 of 1

-08/16/2016 10:01:28: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
+08/16/2016 10:01:28: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1).
 08/16/2016 10:01:28:  Epoch[ 2 of 3]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.50722371 * 250; EvalClassificationError = 0.14800000 * 250; time = 0.0397s; samplesPerSecond = 6295.5
 08/16/2016 10:01:28:  Epoch[ 2 of 3]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.45786101 * 250; EvalClassificationError = 0.12800000 * 250; time = 0.0285s; samplesPerSecond = 8776.9
 08/16/2016 10:01:28:  Epoch[ 2 of 3]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.37902995 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0178s; samplesPerSecond = 14020.5
@ -592,7 +592,7 @@ BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at s
 08/16/2016 10:01:29: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 BlockRandomizer::StartEpoch: epoch 2: frames [20000..30000] (first sequence at sample 20000), data subset 0 of 1

-08/16/2016 10:01:29: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
+08/16/2016 10:01:29: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1).
 08/16/2016 10:01:29:  Epoch[ 3 of 3]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.18478506 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0216s; samplesPerSecond = 11585.3
 08/16/2016 10:01:29:  Epoch[ 3 of 3]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.12741733 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0184s; samplesPerSecond = 13576.6
 08/16/2016 10:01:29:  Epoch[ 3 of 3]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.17535235 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0235s; samplesPerSecond = 10656.9
--- a/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.linux.gpu.txt
+++ b/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.linux.gpu.txt
@ -546,7 +546,7 @@ BlockRandomizer::StartEpoch: epoch 0: frames [0..10000] (first sequence at sampl
 08/16/2016 10:01:33: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at sample 10000), data subset 0 of 1

-08/16/2016 10:01:33: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
+08/16/2016 10:01:33: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1).
 08/16/2016 10:01:33:  Epoch[ 2 of 3]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.61550018 * 250; EvalClassificationError = 0.27600000 * 250; time = 0.0108s; samplesPerSecond = 23111.8
 08/16/2016 10:01:33:  Epoch[ 2 of 3]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.59409242 * 250; EvalClassificationError = 0.28800000 * 250; time = 0.0094s; samplesPerSecond = 26612.7
 08/16/2016 10:01:33:  Epoch[ 2 of 3]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.53884306 * 250; EvalClassificationError = 0.20400000 * 250; time = 0.0093s; samplesPerSecond = 26890.4
@ -593,7 +593,7 @@ BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at s
 08/16/2016 10:01:33: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 BlockRandomizer::StartEpoch: epoch 2: frames [20000..30000] (first sequence at sample 20000), data subset 0 of 1

-08/16/2016 10:01:33: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
+08/16/2016 10:01:33: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1).
 08/16/2016 10:01:33:  Epoch[ 3 of 3]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.18398525 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0097s; samplesPerSecond = 25685.8
 08/16/2016 10:01:33:  Epoch[ 3 of 3]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.12825686 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0095s; samplesPerSecond = 26374.1
 08/16/2016 10:01:33:  Epoch[ 3 of 3]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.17547006 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0095s; samplesPerSecond = 26318.6
--- a/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.windows.cpu.txt
@ -544,7 +544,7 @@ BlockRandomizer::StartEpoch: epoch 0: frames [0..10000] (first sequence at sampl
 08/16/2016 03:19:48: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at sample 10000), data subset 0 of 1

-08/16/2016 03:19:48: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
+08/16/2016 03:19:48: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1).
 08/16/2016 03:19:48:  Epoch[ 2 of 3]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.50509082 * 250; EvalClassificationError = 0.14400000 * 250; time = 0.0250s; samplesPerSecond = 9991.2
 08/16/2016 03:19:48:  Epoch[ 2 of 3]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.45891377 * 250; EvalClassificationError = 0.13200000 * 250; time = 0.0251s; samplesPerSecond = 9958.6
 08/16/2016 03:19:48:  Epoch[ 2 of 3]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.38371187 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0247s; samplesPerSecond = 10117.4
@ -591,7 +591,7 @@ BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at s
 08/16/2016 03:19:49: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 BlockRandomizer::StartEpoch: epoch 2: frames [20000..30000] (first sequence at sample 20000), data subset 0 of 1

-08/16/2016 03:19:49: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
+08/16/2016 03:19:49: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1).
 08/16/2016 03:19:49:  Epoch[ 3 of 3]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.18436522 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0246s; samplesPerSecond = 10145.7
 08/16/2016 03:19:49:  Epoch[ 3 of 3]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.12821186 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0251s; samplesPerSecond = 9945.1
 08/16/2016 03:19:49:  Epoch[ 3 of 3]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.17512306 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0248s; samplesPerSecond = 10084.3
--- a/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Examples/Other/Simple2d/MultiGpu/baseline.windows.gpu.txt
@ -545,7 +545,7 @@ BlockRandomizer::StartEpoch: epoch 0: frames [0..10000] (first sequence at sampl
 08/16/2016 03:19:55: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at sample 10000), data subset 0 of 1

-08/16/2016 03:19:55: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
+08/16/2016 03:19:55: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1).
 08/16/2016 03:19:55:  Epoch[ 2 of 3]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.61550018 * 250; EvalClassificationError = 0.27600000 * 250; time = 0.0399s; samplesPerSecond = 6268.0
 08/16/2016 03:19:55:  Epoch[ 2 of 3]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.59409242 * 250; EvalClassificationError = 0.28800000 * 250; time = 0.0380s; samplesPerSecond = 6577.0
 08/16/2016 03:19:55:  Epoch[ 2 of 3]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.53884306 * 250; EvalClassificationError = 0.20400000 * 250; time = 0.0379s; samplesPerSecond = 6604.0
@ -592,7 +592,7 @@ BlockRandomizer::StartEpoch: epoch 1: frames [10000..20000] (first sequence at s
 08/16/2016 03:19:56: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 BlockRandomizer::StartEpoch: epoch 2: frames [20000..30000] (first sequence at sample 20000), data subset 0 of 1

-08/16/2016 03:19:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
+08/16/2016 03:19:56: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1).
 08/16/2016 03:19:56:  Epoch[ 3 of 3]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.18398525 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0376s; samplesPerSecond = 6641.3
 08/16/2016 03:19:56:  Epoch[ 3 of 3]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.12825686 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0376s; samplesPerSecond = 6653.0
 08/16/2016 03:19:56:  Epoch[ 3 of 3]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.17547006 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0374s; samplesPerSecond = 6692.7
--- a/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.linux.cpu.txt
+++ b/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.linux.cpu.txt
@ -423,7 +423,7 @@ requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 1
 08/16/2016 10:01:43: Starting Epoch 1: learning rate per sample = 0.003906  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses

-08/16/2016 10:01:43: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
+08/16/2016 10:01:43: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1), distributed reading is ENABLED.
 08/16/2016 10:01:44: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.45117986 * 2048; EvalClassificationError = 0.92187500 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.209966s
 08/16/2016 10:01:44: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn'
 08/16/2016 10:01:44: CNTKCommandTrainEnd: speechTrain
--- a/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.linux.gpu.txt
+++ b/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.linux.gpu.txt
@ -424,7 +424,7 @@ requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 1
 08/16/2016 10:01:46: Starting Epoch 1: learning rate per sample = 0.003906  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses

-08/16/2016 10:01:46: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
+08/16/2016 10:01:46: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1), distributed reading is ENABLED.
 08/16/2016 10:01:46: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.41144794 * 2048; EvalClassificationError = 0.92773438 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.023072s
 08/16/2016 10:01:46: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn'
 08/16/2016 10:01:46: CNTKCommandTrainEnd: speechTrain
--- a/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.windows.cpu.txt
@ -422,7 +422,7 @@ requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 1
 08/16/2016 03:20:15: Starting Epoch 1: learning rate per sample = 0.003906  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses

-08/16/2016 03:20:15: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
+08/16/2016 03:20:15: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1), distributed reading is ENABLED.
 08/16/2016 03:20:15: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.46427900 * 2048; EvalClassificationError = 0.91259766 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.28059s
 08/16/2016 03:20:15: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn'
 08/16/2016 03:20:15: CNTKCommandTrainEnd: speechTrain
--- a/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/baseline.windows.gpu.txt
@ -423,7 +423,7 @@ requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 1
 08/16/2016 03:20:21: Starting Epoch 1: learning rate per sample = 0.003906  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses

-08/16/2016 03:20:21: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
+08/16/2016 03:20:21: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 1, numGradientBits = 1), distributed reading is ENABLED.
 08/16/2016 03:20:21: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.41144794 * 2048; EvalClassificationError = 0.92773438 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.05551s
 08/16/2016 03:20:21: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn'
 08/16/2016 03:20:21: CNTKCommandTrainEnd: speechTrain
--- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.cpu.txt
+++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.cpu.txt
@ -477,7 +477,7 @@ MPI Rank 0:
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:08: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:08:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69938312 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1177s; samplesPerSecond = 2124.5
 MPI Rank 0: 08/16/2016 09:57:08:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71368781 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0420s; samplesPerSecond = 5949.7
 MPI Rank 0: 08/16/2016 09:57:09:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72806030 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0598s; samplesPerSecond = 4180.3
@ -523,7 +523,7 @@ MPI Rank 0: 08/16/2016 09:57:10: SGD: Saving checkpoint model '/tmp/cntk-test-20
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:10: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:10:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.46767057 * 250; EvalClassificationError = 0.18400000 * 250; time = 0.0401s; samplesPerSecond = 6237.4
 MPI Rank 0: 08/16/2016 09:57:10:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.39369585 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0482s; samplesPerSecond = 5186.5
 MPI Rank 0: 08/16/2016 09:57:11:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.32550048 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0802s; samplesPerSecond = 3116.5
@ -569,7 +569,7 @@ MPI Rank 0: 08/16/2016 09:57:12: SGD: Saving checkpoint model '/tmp/cntk-test-20
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:12: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:12:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12573638 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0440s; samplesPerSecond = 5677.8
 MPI Rank 0: 08/16/2016 09:57:12:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17793506 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0529s; samplesPerSecond = 4728.4
 MPI Rank 0: 08/16/2016 09:57:12:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14424050 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0229s; samplesPerSecond = 10940.4
@ -615,7 +615,7 @@ MPI Rank 0: 08/16/2016 09:57:14: SGD: Saving checkpoint model '/tmp/cntk-test-20
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:14: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:14:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12378899 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0885s; samplesPerSecond = 2825.7
 MPI Rank 0: 08/16/2016 09:57:14:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18072658 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0204s; samplesPerSecond = 12260.9
 MPI Rank 0: 08/16/2016 09:57:14:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14257652 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0417s; samplesPerSecond = 5988.2
@ -1025,7 +1025,7 @@ MPI Rank 1:
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:08: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:08:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69938312 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1176s; samplesPerSecond = 2125.0
 MPI Rank 1: 08/16/2016 09:57:08:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71368781 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0418s; samplesPerSecond = 5982.7
 MPI Rank 1: 08/16/2016 09:57:09:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72806030 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0600s; samplesPerSecond = 4163.8
@ -1070,7 +1070,7 @@ MPI Rank 1: 08/16/2016 09:57:10: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:10: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:10:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.46767057 * 250; EvalClassificationError = 0.18400000 * 250; time = 0.0401s; samplesPerSecond = 6236.7
 MPI Rank 1: 08/16/2016 09:57:10:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.39369585 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0482s; samplesPerSecond = 5187.4
 MPI Rank 1: 08/16/2016 09:57:11:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.32550048 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0802s; samplesPerSecond = 3116.5
@ -1115,7 +1115,7 @@ MPI Rank 1: 08/16/2016 09:57:12: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:12: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:12:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12573638 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0440s; samplesPerSecond = 5677.2
 MPI Rank 1: 08/16/2016 09:57:12:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17793506 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0529s; samplesPerSecond = 4728.5
 MPI Rank 1: 08/16/2016 09:57:12:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14424050 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0229s; samplesPerSecond = 10939.0
@ -1160,7 +1160,7 @@ MPI Rank 1: 08/16/2016 09:57:14: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:14: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:14:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12378899 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0886s; samplesPerSecond = 2823.0
 MPI Rank 1: 08/16/2016 09:57:14:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18072658 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0204s; samplesPerSecond = 12265.1
 MPI Rank 1: 08/16/2016 09:57:14:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14257652 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0418s; samplesPerSecond = 5987.6
@ -1569,7 +1569,7 @@ MPI Rank 2:
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:08: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:08:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69938312 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1176s; samplesPerSecond = 2126.1
 MPI Rank 2: 08/16/2016 09:57:08:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71368781 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0418s; samplesPerSecond = 5983.7
 MPI Rank 2: 08/16/2016 09:57:09:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72806030 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0602s; samplesPerSecond = 4149.9
@ -1614,7 +1614,7 @@ MPI Rank 2: 08/16/2016 09:57:10: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:10: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:10:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.46767057 * 250; EvalClassificationError = 0.18400000 * 250; time = 0.0401s; samplesPerSecond = 6237.5
 MPI Rank 2: 08/16/2016 09:57:10:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.39369585 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0482s; samplesPerSecond = 5187.5
 MPI Rank 2: 08/16/2016 09:57:11:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.32550048 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0802s; samplesPerSecond = 3116.4
@ -1659,7 +1659,7 @@ MPI Rank 2: 08/16/2016 09:57:12: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:12: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:12:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12573638 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0440s; samplesPerSecond = 5677.9
 MPI Rank 2: 08/16/2016 09:57:12:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17793506 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0529s; samplesPerSecond = 4728.4
 MPI Rank 2: 08/16/2016 09:57:12:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14424050 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0229s; samplesPerSecond = 10940.4
@ -1704,7 +1704,7 @@ MPI Rank 2: 08/16/2016 09:57:14: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:14: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:14:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12378899 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0885s; samplesPerSecond = 2823.4
 MPI Rank 2: 08/16/2016 09:57:14:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18072658 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0204s; samplesPerSecond = 12262.1
 MPI Rank 2: 08/16/2016 09:57:14:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14257652 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0418s; samplesPerSecond = 5988.0
@ -2113,7 +2113,7 @@ MPI Rank 3:
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:08: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:08: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:08:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69938312 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1177s; samplesPerSecond = 2124.5
 MPI Rank 3: 08/16/2016 09:57:08:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71368781 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0418s; samplesPerSecond = 5982.9
 MPI Rank 3: 08/16/2016 09:57:09:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72806030 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0600s; samplesPerSecond = 4163.8
@ -2158,7 +2158,7 @@ MPI Rank 3: 08/16/2016 09:57:10: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:10: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:10: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:10:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.46767057 * 250; EvalClassificationError = 0.18400000 * 250; time = 0.0401s; samplesPerSecond = 6237.1
 MPI Rank 3: 08/16/2016 09:57:10:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.39369585 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0482s; samplesPerSecond = 5187.6
 MPI Rank 3: 08/16/2016 09:57:11:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.32550048 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0802s; samplesPerSecond = 3116.5
@ -2203,7 +2203,7 @@ MPI Rank 3: 08/16/2016 09:57:12: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:12: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:12: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:12:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12573638 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0440s; samplesPerSecond = 5676.9
 MPI Rank 3: 08/16/2016 09:57:12:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17793506 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0529s; samplesPerSecond = 4729.4
 MPI Rank 3: 08/16/2016 09:57:12:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14424050 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0229s; samplesPerSecond = 10939.5
@ -2248,7 +2248,7 @@ MPI Rank 3: 08/16/2016 09:57:14: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:14: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:14: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:14:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12378899 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0886s; samplesPerSecond = 2823.1
 MPI Rank 3: 08/16/2016 09:57:14:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18072658 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0204s; samplesPerSecond = 12269.9
 MPI Rank 3: 08/16/2016 09:57:14:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14257652 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0418s; samplesPerSecond = 5987.9
--- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.gpu.txt
+++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.gpu.txt
@ -478,7 +478,7 @@ MPI Rank 0:
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:23: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:23:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0303s; samplesPerSecond = 8248.6
 MPI Rank 0: 08/16/2016 09:57:23:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0268s; samplesPerSecond = 9325.6
 MPI Rank 0: 08/16/2016 09:57:23:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0267s; samplesPerSecond = 9367.9
@ -524,7 +524,7 @@ MPI Rank 0: 08/16/2016 09:57:24: SGD: Saving checkpoint model '/tmp/cntk-test-20
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:24: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:24:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0236s; samplesPerSecond = 10595.0
 MPI Rank 0: 08/16/2016 09:57:24:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10573.5
 MPI Rank 0: 08/16/2016 09:57:24:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0236s; samplesPerSecond = 10596.8
@ -570,7 +570,7 @@ MPI Rank 0: 08/16/2016 09:57:25: SGD: Saving checkpoint model '/tmp/cntk-test-20
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:25: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:25:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0239s; samplesPerSecond = 10472.5
 MPI Rank 0: 08/16/2016 09:57:25:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0239s; samplesPerSecond = 10455.9
 MPI Rank 0: 08/16/2016 09:57:25:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0237s; samplesPerSecond = 10531.2
@ -616,7 +616,7 @@ MPI Rank 0: 08/16/2016 09:57:26: SGD: Saving checkpoint model '/tmp/cntk-test-20
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:26: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:26:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0238s; samplesPerSecond = 10499.8
 MPI Rank 0: 08/16/2016 09:57:26:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0237s; samplesPerSecond = 10549.4
 MPI Rank 0: 08/16/2016 09:57:26:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0238s; samplesPerSecond = 10492.7
@ -1027,7 +1027,7 @@ MPI Rank 1:
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:23: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:23:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0303s; samplesPerSecond = 8241.3
 MPI Rank 1: 08/16/2016 09:57:23:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0268s; samplesPerSecond = 9325.6
 MPI Rank 1: 08/16/2016 09:57:23:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0267s; samplesPerSecond = 9365.8
@ -1072,7 +1072,7 @@ MPI Rank 1: 08/16/2016 09:57:24: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:24: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:24:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0236s; samplesPerSecond = 10591.9
 MPI Rank 1: 08/16/2016 09:57:24:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10580.7
 MPI Rank 1: 08/16/2016 09:57:24:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0236s; samplesPerSecond = 10592.3
@ -1117,7 +1117,7 @@ MPI Rank 1: 08/16/2016 09:57:25: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:25: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:25:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0239s; samplesPerSecond = 10468.1
 MPI Rank 1: 08/16/2016 09:57:25:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0239s; samplesPerSecond = 10454.6
 MPI Rank 1: 08/16/2016 09:57:25:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0237s; samplesPerSecond = 10534.3
@ -1162,7 +1162,7 @@ MPI Rank 1: 08/16/2016 09:57:26: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:26: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:26:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0238s; samplesPerSecond = 10494.5
 MPI Rank 1: 08/16/2016 09:57:26:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0237s; samplesPerSecond = 10551.6
 MPI Rank 1: 08/16/2016 09:57:26:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0238s; samplesPerSecond = 10494.5
@ -1572,7 +1572,7 @@ MPI Rank 2:
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:23: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:23:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0303s; samplesPerSecond = 8252.7
 MPI Rank 2: 08/16/2016 09:57:23:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0268s; samplesPerSecond = 9325.9
 MPI Rank 2: 08/16/2016 09:57:23:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0267s; samplesPerSecond = 9361.5
@ -1617,7 +1617,7 @@ MPI Rank 2: 08/16/2016 09:57:24: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:24: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:24:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0236s; samplesPerSecond = 10602.2
 MPI Rank 2: 08/16/2016 09:57:24:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10581.6
 MPI Rank 2: 08/16/2016 09:57:24:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0236s; samplesPerSecond = 10587.8
@ -1662,7 +1662,7 @@ MPI Rank 2: 08/16/2016 09:57:25: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:25: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:25:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0239s; samplesPerSecond = 10475.6
 MPI Rank 2: 08/16/2016 09:57:25:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0239s; samplesPerSecond = 10456.3
 MPI Rank 2: 08/16/2016 09:57:25:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0237s; samplesPerSecond = 10532.5
@ -1707,7 +1707,7 @@ MPI Rank 2: 08/16/2016 09:57:26: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:26: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:26:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0238s; samplesPerSecond = 10496.7
 MPI Rank 2: 08/16/2016 09:57:26:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0237s; samplesPerSecond = 10554.3
 MPI Rank 2: 08/16/2016 09:57:26:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0238s; samplesPerSecond = 10492.3
@ -2117,7 +2117,7 @@ MPI Rank 3:
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:23: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:23: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:23:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0303s; samplesPerSecond = 8238.9
 MPI Rank 3: 08/16/2016 09:57:23:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0268s; samplesPerSecond = 9328.0
 MPI Rank 3: 08/16/2016 09:57:23:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0267s; samplesPerSecond = 9366.5
@ -2162,7 +2162,7 @@ MPI Rank 3: 08/16/2016 09:57:24: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:24: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:24: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:24:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0236s; samplesPerSecond = 10595.0
 MPI Rank 3: 08/16/2016 09:57:24:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10578.4
 MPI Rank 3: 08/16/2016 09:57:24:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0236s; samplesPerSecond = 10591.9
@ -2207,7 +2207,7 @@ MPI Rank 3: 08/16/2016 09:57:25: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:25: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:25: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:25:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0239s; samplesPerSecond = 10468.6
 MPI Rank 3: 08/16/2016 09:57:25:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0239s; samplesPerSecond = 10456.8
 MPI Rank 3: 08/16/2016 09:57:25:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0238s; samplesPerSecond = 10485.7
@ -2252,7 +2252,7 @@ MPI Rank 3: 08/16/2016 09:57:26: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:26: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:26: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:26:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0238s; samplesPerSecond = 10489.2
 MPI Rank 3: 08/16/2016 09:57:26:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0237s; samplesPerSecond = 10552.5
 MPI Rank 3: 08/16/2016 09:57:26:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0238s; samplesPerSecond = 10494.9
--- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.windows.cpu.txt
@ -474,7 +474,7 @@ MPI Rank 0:
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:00:56: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:00:57:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70086032 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0154s; samplesPerSecond = 16240.1
 MPI Rank 0: 08/16/2016 03:00:57:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0117s; samplesPerSecond = 21374.8
 MPI Rank 0: 08/16/2016 03:00:57:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.73052449 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0118s; samplesPerSecond = 21211.6
@ -520,7 +520,7 @@ MPI Rank 0: 08/16/2016 03:00:57: SGD: Saving checkpoint model 'C:\Users\svcphil\
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:00:57: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:00:57:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.34943594 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0115s; samplesPerSecond = 21708.9
 MPI Rank 0: 08/16/2016 03:00:57:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.29916586 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0117s; samplesPerSecond = 21413.3
 MPI Rank 0: 08/16/2016 03:00:57:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.24878117 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0118s; samplesPerSecond = 21258.5
@ -566,7 +566,7 @@ MPI Rank 0: 08/16/2016 03:00:57: SGD: Saving checkpoint model 'C:\Users\svcphil\
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:00:57: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:00:57:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12580242 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0103s; samplesPerSecond = 24267.1
 MPI Rank 0: 08/16/2016 03:00:57:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0106s; samplesPerSecond = 23649.6
 MPI Rank 0: 08/16/2016 03:00:58:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14339010 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0104s; samplesPerSecond = 24047.7
@ -612,7 +612,7 @@ MPI Rank 0: 08/16/2016 03:00:58: SGD: Saving checkpoint model 'C:\Users\svcphil\
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:00:58: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:00:58:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0105s; samplesPerSecond = 23753.0
 MPI Rank 0: 08/16/2016 03:00:58:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18118390 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0104s; samplesPerSecond = 24149.9
 MPI Rank 0: 08/16/2016 03:00:58:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0102s; samplesPerSecond = 24478.6
@ -1023,7 +1023,7 @@ MPI Rank 1:
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:00:56: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:00:57:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70086032 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0156s; samplesPerSecond = 15976.5
 MPI Rank 1: 08/16/2016 03:00:57:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0116s; samplesPerSecond = 21485.0
 MPI Rank 1: 08/16/2016 03:00:57:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.73052449 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0119s; samplesPerSecond = 21093.5
@ -1068,7 +1068,7 @@ MPI Rank 1: 08/16/2016 03:00:57: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:00:57: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:00:57:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.34943594 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0115s; samplesPerSecond = 21699.5
 MPI Rank 1: 08/16/2016 03:00:57:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.29916586 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0117s; samplesPerSecond = 21373.0
 MPI Rank 1: 08/16/2016 03:00:57:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.24878117 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0118s; samplesPerSecond = 21139.9
@ -1113,7 +1113,7 @@ MPI Rank 1: 08/16/2016 03:00:57: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:00:57: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:00:57:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12580242 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0104s; samplesPerSecond = 24087.1
 MPI Rank 1: 08/16/2016 03:00:57:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0106s; samplesPerSecond = 23672.0
 MPI Rank 1: 08/16/2016 03:00:58:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14339010 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0105s; samplesPerSecond = 23852.7
@ -1158,7 +1158,7 @@ MPI Rank 1: 08/16/2016 03:00:58: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:00:58: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:00:58:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0105s; samplesPerSecond = 23764.3
 MPI Rank 1: 08/16/2016 03:00:58:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18118390 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0103s; samplesPerSecond = 24224.8
 MPI Rank 1: 08/16/2016 03:00:58:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0102s; samplesPerSecond = 24457.1
@ -1568,7 +1568,7 @@ MPI Rank 2:
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:00:56: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:00:57:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70086032 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0156s; samplesPerSecond = 15999.0
 MPI Rank 2: 08/16/2016 03:00:57:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0117s; samplesPerSecond = 21343.8
 MPI Rank 2: 08/16/2016 03:00:57:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.73052449 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0118s; samplesPerSecond = 21235.0
@ -1613,7 +1613,7 @@ MPI Rank 2: 08/16/2016 03:00:57: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:00:57: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:00:57:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.34943594 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0115s; samplesPerSecond = 21822.6
 MPI Rank 2: 08/16/2016 03:00:57:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.29916586 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0117s; samplesPerSecond = 21393.1
 MPI Rank 2: 08/16/2016 03:00:57:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.24878117 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0118s; samplesPerSecond = 21120.2
@ -1658,7 +1658,7 @@ MPI Rank 2: 08/16/2016 03:00:57: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:00:57: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:00:57:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12580242 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0104s; samplesPerSecond = 23987.7
 MPI Rank 2: 08/16/2016 03:00:57:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0106s; samplesPerSecond = 23636.2
 MPI Rank 2: 08/16/2016 03:00:58:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14339010 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0104s; samplesPerSecond = 24038.5
@ -1703,7 +1703,7 @@ MPI Rank 2: 08/16/2016 03:00:58: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:00:58: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:00:58:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0105s; samplesPerSecond = 23882.3
 MPI Rank 2: 08/16/2016 03:00:58:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18118390 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0103s; samplesPerSecond = 24227.2
 MPI Rank 2: 08/16/2016 03:00:58:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0102s; samplesPerSecond = 24457.1
@ -2113,7 +2113,7 @@ MPI Rank 3:
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:00:56: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:00:56: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:00:57:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70086032 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0156s; samplesPerSecond = 16068.9
 MPI Rank 3: 08/16/2016 03:00:57:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0116s; samplesPerSecond = 21475.8
 MPI Rank 3: 08/16/2016 03:00:57:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.73052449 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0118s; samplesPerSecond = 21148.8
@ -2158,7 +2158,7 @@ MPI Rank 3: 08/16/2016 03:00:57: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:00:57: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:00:57:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.34943594 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0115s; samplesPerSecond = 21716.5
 MPI Rank 3: 08/16/2016 03:00:57:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.29916586 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0117s; samplesPerSecond = 21389.5
 MPI Rank 3: 08/16/2016 03:00:57:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.24878117 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0117s; samplesPerSecond = 21303.8
@ -2203,7 +2203,7 @@ MPI Rank 3: 08/16/2016 03:00:57: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:00:57: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:00:57: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:00:57:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12580242 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0103s; samplesPerSecond = 24255.4
 MPI Rank 3: 08/16/2016 03:00:57:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0106s; samplesPerSecond = 23683.2
 MPI Rank 3: 08/16/2016 03:00:58:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14339010 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0105s; samplesPerSecond = 23900.6
@ -2248,7 +2248,7 @@ MPI Rank 3: 08/16/2016 03:00:58: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:00:58: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:00:58: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:00:58:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0104s; samplesPerSecond = 23971.6
 MPI Rank 3: 08/16/2016 03:00:58:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18118390 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0104s; samplesPerSecond = 24096.4
 MPI Rank 3: 08/16/2016 03:00:58:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0102s; samplesPerSecond = 24471.4
--- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/baseline.windows.gpu.txt
@ -475,7 +475,7 @@ MPI Rank 0:
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:01:13: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:01:13:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0776s; samplesPerSecond = 3220.8
 MPI Rank 0: 08/16/2016 03:01:13:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0484s; samplesPerSecond = 5166.8
 MPI Rank 0: 08/16/2016 03:01:13:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0483s; samplesPerSecond = 5176.1
@ -521,7 +521,7 @@ MPI Rank 0: 08/16/2016 03:01:15: SGD: Saving checkpoint model 'C:\Users\svcphil\
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:01:15: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:01:15:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0460s; samplesPerSecond = 5438.9
 MPI Rank 0: 08/16/2016 03:01:15:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0451s; samplesPerSecond = 5547.3
 MPI Rank 0: 08/16/2016 03:01:15:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0449s; samplesPerSecond = 5566.2
@ -567,7 +567,7 @@ MPI Rank 0: 08/16/2016 03:01:17: SGD: Saving checkpoint model 'C:\Users\svcphil\
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:01:17: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:01:17:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0445s; samplesPerSecond = 5620.9
 MPI Rank 0: 08/16/2016 03:01:17:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0441s; samplesPerSecond = 5668.3
 MPI Rank 0: 08/16/2016 03:01:17:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0444s; samplesPerSecond = 5630.6
@ -613,7 +613,7 @@ MPI Rank 0: 08/16/2016 03:01:18: SGD: Saving checkpoint model 'C:\Users\svcphil\
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:01:18: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:01:18:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0437s; samplesPerSecond = 5715.9
 MPI Rank 0: 08/16/2016 03:01:19:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0443s; samplesPerSecond = 5641.8
 MPI Rank 0: 08/16/2016 03:01:19:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0444s; samplesPerSecond = 5627.5
@ -1025,7 +1025,7 @@ MPI Rank 1:
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:01:13: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:01:13:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0775s; samplesPerSecond = 3223.9
 MPI Rank 1: 08/16/2016 03:01:13:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0484s; samplesPerSecond = 5167.1
 MPI Rank 1: 08/16/2016 03:01:13:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0485s; samplesPerSecond = 5157.7
@ -1070,7 +1070,7 @@ MPI Rank 1: 08/16/2016 03:01:15: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:01:15: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:01:15:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0459s; samplesPerSecond = 5449.4
 MPI Rank 1: 08/16/2016 03:01:15:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0452s; samplesPerSecond = 5529.1
 MPI Rank 1: 08/16/2016 03:01:15:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0449s; samplesPerSecond = 5571.8
@ -1115,7 +1115,7 @@ MPI Rank 1: 08/16/2016 03:01:17: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:01:17: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:01:17:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0444s; samplesPerSecond = 5633.8
 MPI Rank 1: 08/16/2016 03:01:17:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0443s; samplesPerSecond = 5646.9
 MPI Rank 1: 08/16/2016 03:01:17:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0442s; samplesPerSecond = 5653.7
@ -1160,7 +1160,7 @@ MPI Rank 1: 08/16/2016 03:01:18: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:01:18: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:01:18:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0439s; samplesPerSecond = 5701.0
 MPI Rank 1: 08/16/2016 03:01:19:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0441s; samplesPerSecond = 5671.1
 MPI Rank 1: 08/16/2016 03:01:19:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0444s; samplesPerSecond = 5624.4
@ -1571,7 +1571,7 @@ MPI Rank 2:
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:01:13: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:01:13:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0775s; samplesPerSecond = 3225.3
 MPI Rank 2: 08/16/2016 03:01:13:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0485s; samplesPerSecond = 5150.4
 MPI Rank 2: 08/16/2016 03:01:13:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0484s; samplesPerSecond = 5163.0
@ -1616,7 +1616,7 @@ MPI Rank 2: 08/16/2016 03:01:15: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:01:15: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:01:15:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0458s; samplesPerSecond = 5455.7
 MPI Rank 2: 08/16/2016 03:01:15:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0453s; samplesPerSecond = 5514.5
 MPI Rank 2: 08/16/2016 03:01:15:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0447s; samplesPerSecond = 5587.2
@ -1661,7 +1661,7 @@ MPI Rank 2: 08/16/2016 03:01:17: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:01:17: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:01:17:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0445s; samplesPerSecond = 5622.7
 MPI Rank 2: 08/16/2016 03:01:17:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0441s; samplesPerSecond = 5672.9
 MPI Rank 2: 08/16/2016 03:01:17:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0445s; samplesPerSecond = 5622.4
@ -1706,7 +1706,7 @@ MPI Rank 2: 08/16/2016 03:01:18: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:01:18: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:01:18:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0437s; samplesPerSecond = 5716.1
 MPI Rank 2: 08/16/2016 03:01:19:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0444s; samplesPerSecond = 5635.2
 MPI Rank 2: 08/16/2016 03:01:19:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0443s; samplesPerSecond = 5646.7
@ -2117,7 +2117,7 @@ MPI Rank 3:
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:01:13: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:01:13: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:01:13:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69934401 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0769s; samplesPerSecond = 3252.2
 MPI Rank 3: 08/16/2016 03:01:13:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71365166 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0483s; samplesPerSecond = 5174.1
 MPI Rank 3: 08/16/2016 03:01:13:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72803064 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0485s; samplesPerSecond = 5155.8
@ -2162,7 +2162,7 @@ MPI Rank 3: 08/16/2016 03:01:15: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:01:15: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:01:15: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:01:15:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.27493252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0459s; samplesPerSecond = 5441.9
 MPI Rank 3: 08/16/2016 03:01:15:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.24181296 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0447s; samplesPerSecond = 5588.3
 MPI Rank 3: 08/16/2016 03:01:15:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.20316066 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0447s; samplesPerSecond = 5598.4
@ -2207,7 +2207,7 @@ MPI Rank 3: 08/16/2016 03:01:17: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:01:17: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:01:17: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:01:17:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12515571 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0439s; samplesPerSecond = 5698.5
 MPI Rank 3: 08/16/2016 03:01:17:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17892936 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0447s; samplesPerSecond = 5587.8
 MPI Rank 3: 08/16/2016 03:01:17:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14366253 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0438s; samplesPerSecond = 5712.8
@ -2252,7 +2252,7 @@ MPI Rank 3: 08/16/2016 03:01:18: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:01:18: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:01:18: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:01:18:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12380915 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0433s; samplesPerSecond = 5776.6
 MPI Rank 3: 08/16/2016 03:01:19:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18110099 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0442s; samplesPerSecond = 5659.4
 MPI Rank 3: 08/16/2016 03:01:19:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14240048 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0450s; samplesPerSecond = 5553.0
--- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/testcases.yml
+++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/DoublePrecision/testcases.yml
@ -35,6 +35,6 @@ testCases:
      - ^MPI Rank {{integer}}
      - Starting minibatch loop
      - DataParallelSGD training
-      - MyRank = {{integer}}
-      - NumNodes = 4
-      - NumGradientBits = 64
+      - myRank = {{integer}}
+      - numNodes = 4
+      - numGradientBits = 64
--- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.cpu.txt
+++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.cpu.txt
@ -477,7 +477,7 @@ MPI Rank 0:
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:31: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:31:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69922868 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1682s; samplesPerSecond = 1485.9
 MPI Rank 0: 08/16/2016 09:57:31:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71203584 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0449s; samplesPerSecond = 5569.9
 MPI Rank 0: 08/16/2016 09:57:31:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72631286 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0442s; samplesPerSecond = 5659.4
@ -523,7 +523,7 @@ MPI Rank 0: 08/16/2016 09:57:33: SGD: Saving checkpoint model '/tmp/cntk-test-20
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:33: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:33:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.31415305 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0654s; samplesPerSecond = 3824.3
 MPI Rank 0: 08/16/2016 09:57:33:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.26920577 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0495s; samplesPerSecond = 5055.2
 MPI Rank 0: 08/16/2016 09:57:33:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.22349829 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0735s; samplesPerSecond = 3401.0
@ -569,7 +569,7 @@ MPI Rank 0: 08/16/2016 09:57:35: SGD: Saving checkpoint model '/tmp/cntk-test-20
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:35: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:35:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12535183 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0616s; samplesPerSecond = 4060.4
 MPI Rank 0: 08/16/2016 09:57:35:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17861531 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0444s; samplesPerSecond = 5635.1
 MPI Rank 0: 08/16/2016 09:57:35:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14359719 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0514s; samplesPerSecond = 4860.2
@ -615,7 +615,7 @@ MPI Rank 0: 08/16/2016 09:57:37: SGD: Saving checkpoint model '/tmp/cntk-test-20
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:37: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:37:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12387404 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0457s; samplesPerSecond = 5476.1
 MPI Rank 0: 08/16/2016 09:57:37:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18078590 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0465s; samplesPerSecond = 5374.8
 MPI Rank 0: 08/16/2016 09:57:37:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14225625 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0317s; samplesPerSecond = 7886.9
@ -1025,7 +1025,7 @@ MPI Rank 1:
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:31: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:31:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69922868 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1683s; samplesPerSecond = 1485.7
 MPI Rank 1: 08/16/2016 09:57:31:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71203584 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0449s; samplesPerSecond = 5571.0
 MPI Rank 1: 08/16/2016 09:57:31:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72631286 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0442s; samplesPerSecond = 5659.4
@ -1070,7 +1070,7 @@ MPI Rank 1: 08/16/2016 09:57:33: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:33: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:33:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.31415305 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0654s; samplesPerSecond = 3823.9
 MPI Rank 1: 08/16/2016 09:57:33:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.26920577 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0495s; samplesPerSecond = 5055.5
 MPI Rank 1: 08/16/2016 09:57:33:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.22349829 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0735s; samplesPerSecond = 3401.1
@ -1115,7 +1115,7 @@ MPI Rank 1: 08/16/2016 09:57:35: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:35: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:35:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12535183 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0616s; samplesPerSecond = 4060.0
 MPI Rank 1: 08/16/2016 09:57:35:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17861531 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0444s; samplesPerSecond = 5634.6
 MPI Rank 1: 08/16/2016 09:57:35:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14359719 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0514s; samplesPerSecond = 4859.8
@ -1160,7 +1160,7 @@ MPI Rank 1: 08/16/2016 09:57:37: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:37: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:37:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12387404 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0457s; samplesPerSecond = 5474.7
 MPI Rank 1: 08/16/2016 09:57:37:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18078590 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0465s; samplesPerSecond = 5375.0
 MPI Rank 1: 08/16/2016 09:57:37:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14225625 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0317s; samplesPerSecond = 7886.7
@ -1569,7 +1569,7 @@ MPI Rank 2:
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:31: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:31:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69922868 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1682s; samplesPerSecond = 1486.1
 MPI Rank 2: 08/16/2016 09:57:31:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71203584 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0449s; samplesPerSecond = 5570.0
 MPI Rank 2: 08/16/2016 09:57:31:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72631286 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0442s; samplesPerSecond = 5658.7
@ -1614,7 +1614,7 @@ MPI Rank 2: 08/16/2016 09:57:33: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:33: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:33:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.31415305 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0654s; samplesPerSecond = 3824.5
 MPI Rank 2: 08/16/2016 09:57:33:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.26920577 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0495s; samplesPerSecond = 5055.0
 MPI Rank 2: 08/16/2016 09:57:33:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.22349829 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0735s; samplesPerSecond = 3401.3
@ -1659,7 +1659,7 @@ MPI Rank 2: 08/16/2016 09:57:35: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:35: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:35:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12535183 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0616s; samplesPerSecond = 4060.4
 MPI Rank 2: 08/16/2016 09:57:35:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17861531 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0444s; samplesPerSecond = 5634.9
 MPI Rank 2: 08/16/2016 09:57:35:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14359719 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0515s; samplesPerSecond = 4851.7
@ -1704,7 +1704,7 @@ MPI Rank 2: 08/16/2016 09:57:37: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:37: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:37:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12387404 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0457s; samplesPerSecond = 5475.9
 MPI Rank 2: 08/16/2016 09:57:37:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18078590 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0465s; samplesPerSecond = 5375.1
 MPI Rank 2: 08/16/2016 09:57:37:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14225625 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0317s; samplesPerSecond = 7886.9
@ -2113,7 +2113,7 @@ MPI Rank 3:
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:31: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:31: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:31:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.69922868 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1683s; samplesPerSecond = 1485.8
 MPI Rank 3: 08/16/2016 09:57:31:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71203584 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0449s; samplesPerSecond = 5571.0
 MPI Rank 3: 08/16/2016 09:57:31:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72631286 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0442s; samplesPerSecond = 5659.2
@ -2158,7 +2158,7 @@ MPI Rank 3: 08/16/2016 09:57:33: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:33: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:33: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:33:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.31415305 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0654s; samplesPerSecond = 3824.1
 MPI Rank 3: 08/16/2016 09:57:33:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.26920577 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0495s; samplesPerSecond = 5055.3
 MPI Rank 3: 08/16/2016 09:57:33:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.22349829 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0735s; samplesPerSecond = 3401.1
@ -2203,7 +2203,7 @@ MPI Rank 3: 08/16/2016 09:57:35: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:35: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:35: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:35:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12535183 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0616s; samplesPerSecond = 4060.2
 MPI Rank 3: 08/16/2016 09:57:35:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17861531 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0444s; samplesPerSecond = 5635.1
 MPI Rank 3: 08/16/2016 09:57:35:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14359719 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0514s; samplesPerSecond = 4859.2
@ -2248,7 +2248,7 @@ MPI Rank 3: 08/16/2016 09:57:37: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:37: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:37: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:37:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12387404 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0457s; samplesPerSecond = 5474.5
 MPI Rank 3: 08/16/2016 09:57:37:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18078590 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0465s; samplesPerSecond = 5374.7
 MPI Rank 3: 08/16/2016 09:57:37:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14225625 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0317s; samplesPerSecond = 7886.9
--- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.gpu.txt
+++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.gpu.txt
@ -478,7 +478,7 @@ MPI Rank 0:
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:46: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:46:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0224s; samplesPerSecond = 11152.3
 MPI Rank 0: 08/16/2016 09:57:46:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0190s; samplesPerSecond = 13162.7
 MPI Rank 0: 08/16/2016 09:57:46:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0192s; samplesPerSecond = 13013.4
@ -524,7 +524,7 @@ MPI Rank 0: 08/16/2016 09:57:47: SGD: Saving checkpoint model '/tmp/cntk-test-20
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:47: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:47:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0199s; samplesPerSecond = 12582.4
 MPI Rank 0: 08/16/2016 09:57:47:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0190s; samplesPerSecond = 13160.0
 MPI Rank 0: 08/16/2016 09:57:47:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0190s; samplesPerSecond = 13169.7
@ -570,7 +570,7 @@ MPI Rank 0: 08/16/2016 09:57:48: SGD: Saving checkpoint model '/tmp/cntk-test-20
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:48: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:48:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0192s; samplesPerSecond = 13007.3
 MPI Rank 0: 08/16/2016 09:57:48:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0191s; samplesPerSecond = 13097.2
 MPI Rank 0: 08/16/2016 09:57:48:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0190s; samplesPerSecond = 13152.4
@ -616,7 +616,7 @@ MPI Rank 0: 08/16/2016 09:57:48: SGD: Saving checkpoint model '/tmp/cntk-test-20
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 09:57:48: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:48:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0191s; samplesPerSecond = 13088.3
 MPI Rank 0: 08/16/2016 09:57:48:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0192s; samplesPerSecond = 13015.4
 MPI Rank 0: 08/16/2016 09:57:48:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0191s; samplesPerSecond = 13072.6
@ -1027,7 +1027,7 @@ MPI Rank 1:
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:46: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:46:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0224s; samplesPerSecond = 11167.7
 MPI Rank 1: 08/16/2016 09:57:46:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0190s; samplesPerSecond = 13152.4
 MPI Rank 1: 08/16/2016 09:57:46:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0192s; samplesPerSecond = 13011.3
@ -1072,7 +1072,7 @@ MPI Rank 1: 08/16/2016 09:57:47: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:47: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:47:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0199s; samplesPerSecond = 12576.7
 MPI Rank 1: 08/16/2016 09:57:47:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0190s; samplesPerSecond = 13170.4
 MPI Rank 1: 08/16/2016 09:57:47:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0190s; samplesPerSecond = 13166.9
@ -1117,7 +1117,7 @@ MPI Rank 1: 08/16/2016 09:57:48: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:48: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:48:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0192s; samplesPerSecond = 12997.8
 MPI Rank 1: 08/16/2016 09:57:48:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0191s; samplesPerSecond = 13088.3
 MPI Rank 1: 08/16/2016 09:57:48:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0190s; samplesPerSecond = 13155.8
@ -1162,7 +1162,7 @@ MPI Rank 1: 08/16/2016 09:57:48: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 09:57:48: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:48:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0191s; samplesPerSecond = 13087.6
 MPI Rank 1: 08/16/2016 09:57:48:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0192s; samplesPerSecond = 13015.4
 MPI Rank 1: 08/16/2016 09:57:48:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0191s; samplesPerSecond = 13065.7
@ -1572,7 +1572,7 @@ MPI Rank 2:
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:46: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:46:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0224s; samplesPerSecond = 11148.3
 MPI Rank 2: 08/16/2016 09:57:46:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0190s; samplesPerSecond = 13153.7
 MPI Rank 2: 08/16/2016 09:57:46:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0192s; samplesPerSecond = 13011.3
@ -1617,7 +1617,7 @@ MPI Rank 2: 08/16/2016 09:57:47: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:47: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:47:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0199s; samplesPerSecond = 12587.5
 MPI Rank 2: 08/16/2016 09:57:47:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0190s; samplesPerSecond = 13157.2
 MPI Rank 2: 08/16/2016 09:57:47:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0190s; samplesPerSecond = 13166.2
@ -1662,7 +1662,7 @@ MPI Rank 2: 08/16/2016 09:57:48: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:48: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:48:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0192s; samplesPerSecond = 13010.7
 MPI Rank 2: 08/16/2016 09:57:48:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0191s; samplesPerSecond = 13091.1
 MPI Rank 2: 08/16/2016 09:57:48:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0190s; samplesPerSecond = 13153.7
@ -1707,7 +1707,7 @@ MPI Rank 2: 08/16/2016 09:57:48: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 09:57:48: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:57:48:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0191s; samplesPerSecond = 13085.6
 MPI Rank 2: 08/16/2016 09:57:48:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0192s; samplesPerSecond = 13024.9
 MPI Rank 2: 08/16/2016 09:57:48:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0191s; samplesPerSecond = 13062.3
@ -2117,7 +2117,7 @@ MPI Rank 3:
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:46: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:46: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:46:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0224s; samplesPerSecond = 11158.7
 MPI Rank 3: 08/16/2016 09:57:46:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0190s; samplesPerSecond = 13155.1
 MPI Rank 3: 08/16/2016 09:57:46:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0192s; samplesPerSecond = 13011.3
@ -2162,7 +2162,7 @@ MPI Rank 3: 08/16/2016 09:57:47: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:47: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:47: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:47:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0199s; samplesPerSecond = 12581.8
 MPI Rank 3: 08/16/2016 09:57:47:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0190s; samplesPerSecond = 13160.0
 MPI Rank 3: 08/16/2016 09:57:47:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0190s; samplesPerSecond = 13168.3
@ -2207,7 +2207,7 @@ MPI Rank 3: 08/16/2016 09:57:48: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:48: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:48:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0192s; samplesPerSecond = 12998.5
 MPI Rank 3: 08/16/2016 09:57:48:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0191s; samplesPerSecond = 13089.0
 MPI Rank 3: 08/16/2016 09:57:48:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0190s; samplesPerSecond = 13149.6
@ -2252,7 +2252,7 @@ MPI Rank 3: 08/16/2016 09:57:48: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 09:57:48: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 09:57:48:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0191s; samplesPerSecond = 13078.7
 MPI Rank 3: 08/16/2016 09:57:48:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0192s; samplesPerSecond = 13024.2
 MPI Rank 3: 08/16/2016 09:57:48:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0191s; samplesPerSecond = 13061.7
--- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.windows.cpu.txt
@ -474,7 +474,7 @@ MPI Rank 0:
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:01:29: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:01:29:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70086033 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0147s; samplesPerSecond = 16956.0
 MPI Rank 0: 08/16/2016 03:01:29:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0116s; samplesPerSecond = 21626.3
 MPI Rank 0: 08/16/2016 03:01:29:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.73052450 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0108s; samplesPerSecond = 23062.7
@ -520,7 +520,7 @@ MPI Rank 0: 08/16/2016 03:01:30: SGD: Saving checkpoint model 'C:\Users\svcphil\
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:01:30: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:01:30:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.34943644 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0107s; samplesPerSecond = 23286.1
 MPI Rank 0: 08/16/2016 03:01:30:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.29916625 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0108s; samplesPerSecond = 23126.7
 MPI Rank 0: 08/16/2016 03:01:30:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.24878148 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0104s; samplesPerSecond = 23967.0
@ -566,7 +566,7 @@ MPI Rank 0: 08/16/2016 03:01:30: SGD: Saving checkpoint model 'C:\Users\svcphil\
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:01:30: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:01:30:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12580243 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0097s; samplesPerSecond = 25858.5
 MPI Rank 0: 08/16/2016 03:01:30:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0098s; samplesPerSecond = 25403.9
 MPI Rank 0: 08/16/2016 03:01:30:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14339012 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0097s; samplesPerSecond = 25677.9
@ -612,7 +612,7 @@ MPI Rank 0: 08/16/2016 03:01:30: SGD: Saving checkpoint model 'C:\Users\svcphil\
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:01:30: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:01:30:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0100s; samplesPerSecond = 24985.0
 MPI Rank 0: 08/16/2016 03:01:30:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18118389 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0097s; samplesPerSecond = 25646.3
 MPI Rank 0: 08/16/2016 03:01:30:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0095s; samplesPerSecond = 26246.7
@ -1023,7 +1023,7 @@ MPI Rank 1:
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:01:29: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:01:29:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70086033 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0144s; samplesPerSecond = 17364.7
 MPI Rank 1: 08/16/2016 03:01:29:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0116s; samplesPerSecond = 21602.0
 MPI Rank 1: 08/16/2016 03:01:29:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.73052450 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0109s; samplesPerSecond = 22994.8
@ -1068,7 +1068,7 @@ MPI Rank 1: 08/16/2016 03:01:30: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:01:30: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:01:30:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.34943644 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0108s; samplesPerSecond = 23249.3
 MPI Rank 1: 08/16/2016 03:01:30:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.29916625 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0108s; samplesPerSecond = 23067.0
 MPI Rank 1: 08/16/2016 03:01:30:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.24878148 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0105s; samplesPerSecond = 23859.5
@ -1113,7 +1113,7 @@ MPI Rank 1: 08/16/2016 03:01:30: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:01:30: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:01:30:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12580243 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0096s; samplesPerSecond = 25930.9
 MPI Rank 1: 08/16/2016 03:01:30:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0099s; samplesPerSecond = 25148.4
 MPI Rank 1: 08/16/2016 03:01:30:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14339012 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0098s; samplesPerSecond = 25544.1
@ -1158,7 +1158,7 @@ MPI Rank 1: 08/16/2016 03:01:30: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:01:30: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:01:30:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0100s; samplesPerSecond = 25108.0
 MPI Rank 1: 08/16/2016 03:01:30:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18118389 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0098s; samplesPerSecond = 25510.2
 MPI Rank 1: 08/16/2016 03:01:30:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0096s; samplesPerSecond = 26126.0
@ -1568,7 +1568,7 @@ MPI Rank 2:
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:01:29: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:01:29:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70086033 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0147s; samplesPerSecond = 16960.7
 MPI Rank 2: 08/16/2016 03:01:29:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0116s; samplesPerSecond = 21613.2
 MPI Rank 2: 08/16/2016 03:01:29:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.73052450 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0108s; samplesPerSecond = 23058.5
@ -1613,7 +1613,7 @@ MPI Rank 2: 08/16/2016 03:01:30: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:01:30: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:01:30:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.34943644 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0108s; samplesPerSecond = 23245.0
 MPI Rank 2: 08/16/2016 03:01:30:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.29916625 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0108s; samplesPerSecond = 23075.5
 MPI Rank 2: 08/16/2016 03:01:30:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.24878148 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0105s; samplesPerSecond = 23818.6
@ -1658,7 +1658,7 @@ MPI Rank 2: 08/16/2016 03:01:30: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:01:30: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:01:30:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12580243 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0097s; samplesPerSecond = 25823.8
 MPI Rank 2: 08/16/2016 03:01:30:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0102s; samplesPerSecond = 24616.0
 MPI Rank 2: 08/16/2016 03:01:30:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14339012 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0097s; samplesPerSecond = 25654.2
@ -1703,7 +1703,7 @@ MPI Rank 2: 08/16/2016 03:01:30: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:01:30: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:01:30:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0100s; samplesPerSecond = 25110.5
 MPI Rank 2: 08/16/2016 03:01:30:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18118389 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0098s; samplesPerSecond = 25434.9
 MPI Rank 2: 08/16/2016 03:01:30:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0096s; samplesPerSecond = 26087.9
@ -2113,7 +2113,7 @@ MPI Rank 3:
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:01:29: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:01:29: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:01:29:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70086033 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0146s; samplesPerSecond = 17070.7
 MPI Rank 3: 08/16/2016 03:01:29:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71633890 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0116s; samplesPerSecond = 21603.9
 MPI Rank 3: 08/16/2016 03:01:29:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.73052450 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0109s; samplesPerSecond = 22958.9
@ -2158,7 +2158,7 @@ MPI Rank 3: 08/16/2016 03:01:30: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:01:30: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:01:30:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.34943644 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0107s; samplesPerSecond = 23292.6
 MPI Rank 3: 08/16/2016 03:01:30:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.29916625 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0109s; samplesPerSecond = 23033.0
 MPI Rank 3: 08/16/2016 03:01:30:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.24878148 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0104s; samplesPerSecond = 23992.3
@ -2203,7 +2203,7 @@ MPI Rank 3: 08/16/2016 03:01:30: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:01:30: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:01:30:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12580243 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0097s; samplesPerSecond = 25767.9
 MPI Rank 3: 08/16/2016 03:01:30:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17848323 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0099s; samplesPerSecond = 25255.1
 MPI Rank 3: 08/16/2016 03:01:30:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14339012 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0098s; samplesPerSecond = 25578.1
@ -2248,7 +2248,7 @@ MPI Rank 3: 08/16/2016 03:01:30: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:01:30: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:01:30: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:01:30:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12368176 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0100s; samplesPerSecond = 24982.5
 MPI Rank 3: 08/16/2016 03:01:30:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18118389 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0097s; samplesPerSecond = 25683.2
 MPI Rank 3: 08/16/2016 03:01:30:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14161964 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0096s; samplesPerSecond = 26158.8
--- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/baseline.windows.gpu.txt
@ -475,7 +475,7 @@ MPI Rank 0:
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:01:45: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:01:45:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0501s; samplesPerSecond = 4991.1
 MPI Rank 0: 08/16/2016 03:01:45:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0441s; samplesPerSecond = 5669.3
 MPI Rank 0: 08/16/2016 03:01:45:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0437s; samplesPerSecond = 5717.2
@ -521,7 +521,7 @@ MPI Rank 0: 08/16/2016 03:01:47: SGD: Saving checkpoint model 'C:\Users\svcphil\
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:01:47: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:01:47:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0418s; samplesPerSecond = 5977.9
 MPI Rank 0: 08/16/2016 03:01:47:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0418s; samplesPerSecond = 5974.1
 MPI Rank 0: 08/16/2016 03:01:47:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0414s; samplesPerSecond = 6034.7
@ -567,7 +567,7 @@ MPI Rank 0: 08/16/2016 03:01:49: SGD: Saving checkpoint model 'C:\Users\svcphil\
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:01:49: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:01:49:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0408s; samplesPerSecond = 6125.3
 MPI Rank 0: 08/16/2016 03:01:49:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0408s; samplesPerSecond = 6134.7
 MPI Rank 0: 08/16/2016 03:01:49:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0408s; samplesPerSecond = 6134.1
@ -613,7 +613,7 @@ MPI Rank 0: 08/16/2016 03:01:50: SGD: Saving checkpoint model 'C:\Users\svcphil\
 MPI Rank 0: 
 MPI Rank 0: 08/16/2016 03:01:50: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:01:50:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0415s; samplesPerSecond = 6017.7
 MPI Rank 0: 08/16/2016 03:01:50:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0411s; samplesPerSecond = 6089.4
 MPI Rank 0: 08/16/2016 03:01:51:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0411s; samplesPerSecond = 6081.8
@ -1025,7 +1025,7 @@ MPI Rank 1:
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:01:45: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:01:45:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0500s; samplesPerSecond = 4999.0
 MPI Rank 1: 08/16/2016 03:01:45:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0441s; samplesPerSecond = 5668.0
 MPI Rank 1: 08/16/2016 03:01:45:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0438s; samplesPerSecond = 5714.3
@ -1070,7 +1070,7 @@ MPI Rank 1: 08/16/2016 03:01:47: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:01:47: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:01:47:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0419s; samplesPerSecond = 5968.3
 MPI Rank 1: 08/16/2016 03:01:47:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0417s; samplesPerSecond = 5993.5
 MPI Rank 1: 08/16/2016 03:01:47:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0414s; samplesPerSecond = 6033.4
@ -1115,7 +1115,7 @@ MPI Rank 1: 08/16/2016 03:01:49: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:01:49: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:01:49:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0409s; samplesPerSecond = 6107.4
 MPI Rank 1: 08/16/2016 03:01:49:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0407s; samplesPerSecond = 6135.7
 MPI Rank 1: 08/16/2016 03:01:49:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0408s; samplesPerSecond = 6131.4
@ -1160,7 +1160,7 @@ MPI Rank 1: 08/16/2016 03:01:50: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 1: 
 MPI Rank 1: 08/16/2016 03:01:50: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:01:50:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0417s; samplesPerSecond = 5996.9
 MPI Rank 1: 08/16/2016 03:01:50:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0410s; samplesPerSecond = 6099.5
 MPI Rank 1: 08/16/2016 03:01:51:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0409s; samplesPerSecond = 6108.3
@ -1571,7 +1571,7 @@ MPI Rank 2:
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:01:45: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:01:45:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0500s; samplesPerSecond = 5001.6
 MPI Rank 2: 08/16/2016 03:01:45:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0442s; samplesPerSecond = 5651.4
 MPI Rank 2: 08/16/2016 03:01:45:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0437s; samplesPerSecond = 5716.9
@ -1616,7 +1616,7 @@ MPI Rank 2: 08/16/2016 03:01:47: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:01:47: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:01:47:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0418s; samplesPerSecond = 5980.7
 MPI Rank 2: 08/16/2016 03:01:47:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0419s; samplesPerSecond = 5970.4
 MPI Rank 2: 08/16/2016 03:01:47:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0414s; samplesPerSecond = 6032.8
@ -1661,7 +1661,7 @@ MPI Rank 2: 08/16/2016 03:01:49: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:01:49: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:01:49:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0410s; samplesPerSecond = 6099.9
 MPI Rank 2: 08/16/2016 03:01:49:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0408s; samplesPerSecond = 6128.8
 MPI Rank 2: 08/16/2016 03:01:49:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0408s; samplesPerSecond = 6131.7
@ -1706,7 +1706,7 @@ MPI Rank 2: 08/16/2016 03:01:50: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 2: 
 MPI Rank 2: 08/16/2016 03:01:50: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:01:50:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0416s; samplesPerSecond = 6010.9
 MPI Rank 2: 08/16/2016 03:01:50:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0410s; samplesPerSecond = 6100.5
 MPI Rank 2: 08/16/2016 03:01:51:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0411s; samplesPerSecond = 6076.1
@ -2117,7 +2117,7 @@ MPI Rank 3:
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:01:45: Starting Epoch 1: learning rate per sample = 0.020000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:01:45: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:01:45:  Epoch[ 1 of 4]-Minibatch[   1-  10]: CrossEntropyWithSoftmax = 0.70007977 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0501s; samplesPerSecond = 4987.8
 MPI Rank 3: 08/16/2016 03:01:45:  Epoch[ 1 of 4]-Minibatch[  11-  20]: CrossEntropyWithSoftmax = 0.71514542 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0440s; samplesPerSecond = 5676.8
 MPI Rank 3: 08/16/2016 03:01:45:  Epoch[ 1 of 4]-Minibatch[  21-  30]: CrossEntropyWithSoftmax = 0.72945594 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0437s; samplesPerSecond = 5717.7
@ -2162,7 +2162,7 @@ MPI Rank 3: 08/16/2016 03:01:47: Finished Epoch[ 1 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:01:47: Starting Epoch 2: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:01:47: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:01:47:  Epoch[ 2 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.50774630 * 250; EvalClassificationError = 0.24000000 * 250; time = 0.0419s; samplesPerSecond = 5970.3
 MPI Rank 3: 08/16/2016 03:01:47:  Epoch[ 2 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.43388931 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0418s; samplesPerSecond = 5987.2
 MPI Rank 3: 08/16/2016 03:01:47:  Epoch[ 2 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.36674875 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0415s; samplesPerSecond = 6031.1
@ -2207,7 +2207,7 @@ MPI Rank 3: 08/16/2016 03:01:49: Finished Epoch[ 2 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:01:49: Starting Epoch 3: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:01:49: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:01:49:  Epoch[ 3 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12590085 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0409s; samplesPerSecond = 6107.7
 MPI Rank 3: 08/16/2016 03:01:49:  Epoch[ 3 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.17780229 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0408s; samplesPerSecond = 6130.2
 MPI Rank 3: 08/16/2016 03:01:49:  Epoch[ 3 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14417637 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0408s; samplesPerSecond = 6130.9
@ -2252,7 +2252,7 @@ MPI Rank 3: 08/16/2016 03:01:50: Finished Epoch[ 3 of 4]: [Training] CrossEntrop
 MPI Rank 3: 
 MPI Rank 3: 08/16/2016 03:01:50: Starting Epoch 4: learning rate per sample = 0.008000  effective momentum = 0.900000  momentum as time constant = 237.3 samples
 MPI Rank 3: 
-MPI Rank 3: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (MyRank = 3, NumNodes = 4, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 3: 08/16/2016 03:01:50: Starting minibatch loop, DataParallelSGD training (myRank = 3, numNodes = 4, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 3: 08/16/2016 03:01:50:  Epoch[ 4 of 4]-Minibatch[   1-  10, 2.50%]: CrossEntropyWithSoftmax = 0.12371233 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0417s; samplesPerSecond = 6001.1
 MPI Rank 3: 08/16/2016 03:01:50:  Epoch[ 4 of 4]-Minibatch[  11-  20, 5.00%]: CrossEntropyWithSoftmax = 0.18070513 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0410s; samplesPerSecond = 6097.1
 MPI Rank 3: 08/16/2016 03:01:51:  Epoch[ 4 of 4]-Minibatch[  21-  30, 7.50%]: CrossEntropyWithSoftmax = 0.14239730 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0410s; samplesPerSecond = 6094.4
--- a/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/testcases.yml
+++ b/Tests/EndToEndTests/ParallelTraining/NoQuantization/SinglePrecision/testcases.yml
@ -35,6 +35,6 @@ testCases:
      - ^MPI Rank {{integer}}
      - Starting minibatch loop
      - DataParallelSGD training
-      - MyRank = {{integer}}
-      - NumNodes = 4
-      - NumGradientBits = 32
+      - myRank = {{integer}}
+      - numNodes = 4
+      - numGradientBits = 32
--- a/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.cpu.txt
@ -549,7 +549,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:57:34: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:34: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:34: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:35:  Epoch[ 2 of 5]-Minibatch[   1-  10, 12.50%]: ce = 3.95183370 * 2560; err = 0.88007813 * 2560; time = 0.9133s; samplesPerSecond = 2803.0
 MPI Rank 0: 08/16/2016 09:57:36:  Epoch[ 2 of 5]-Minibatch[  11-  20, 25.00%]: ce = 3.87879531 * 2560; err = 0.87578125 * 2560; time = 0.8783s; samplesPerSecond = 2914.9
 MPI Rank 0: 08/16/2016 09:57:37:  Epoch[ 2 of 5]-Minibatch[  21-  30, 37.50%]: ce = 3.85396065 * 2560; err = 0.87578125 * 2560; time = 0.8777s; samplesPerSecond = 2916.6
@ -565,7 +565,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:57:42: Starting Epoch 3: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:42: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:42: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:42:  Epoch[ 3 of 5]-Minibatch[   1-  10, 12.50%]: ce = 3.37945068 * 2560; err = 0.79882812 * 2560; time = 0.8417s; samplesPerSecond = 3041.5
 MPI Rank 0: 08/16/2016 09:57:43:  Epoch[ 3 of 5]-Minibatch[  11-  20, 25.00%]: ce = 3.29694288 * 2560; err = 0.79570312 * 2560; time = 0.8678s; samplesPerSecond = 2950.0
 MPI Rank 0: 08/16/2016 09:57:44:  Epoch[ 3 of 5]-Minibatch[  21-  30, 37.50%]: ce = 3.30035303 * 2560; err = 0.80468750 * 2560; time = 0.8295s; samplesPerSecond = 3086.3
@ -580,7 +580,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:57:48: Starting Epoch 4: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:49:  Epoch[ 4 of 5]-Minibatch[   1-  10, 12.50%]: ce = 3.01965680 * 2560; err = 0.74375000 * 2560; time = 0.8373s; samplesPerSecond = 3057.6
 MPI Rank 0: 08/16/2016 09:57:50:  Epoch[ 4 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.94570588 * 2560; err = 0.72031250 * 2560; time = 0.8488s; samplesPerSecond = 3016.1
 MPI Rank 0: 08/16/2016 09:57:51:  Epoch[ 4 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.92723926 * 2560; err = 0.71875000 * 2560; time = 0.8374s; samplesPerSecond = 3057.2
@ -596,7 +596,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:57:55: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:55: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:55: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:56:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.62996846 * 2560; err = 0.65039062 * 2560; time = 0.8067s; samplesPerSecond = 3173.4
 MPI Rank 0: 08/16/2016 09:57:57:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.62577187 * 2560; err = 0.66914063 * 2560; time = 0.8110s; samplesPerSecond = 3156.7
 MPI Rank 0: 08/16/2016 09:57:57:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.50552697 * 2560; err = 0.64570313 * 2560; time = 0.7916s; samplesPerSecond = 3234.0
@ -1101,7 +1101,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:57:34: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:34: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:34: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:35:  Epoch[ 2 of 5]-Minibatch[   1-  10, 12.50%]: ce = 3.95183370 * 2560; err = 0.88007813 * 2560; time = 0.9135s; samplesPerSecond = 2802.3
 MPI Rank 1: 08/16/2016 09:57:36:  Epoch[ 2 of 5]-Minibatch[  11-  20, 25.00%]: ce = 3.87879531 * 2560; err = 0.87578125 * 2560; time = 0.8781s; samplesPerSecond = 2915.3
 MPI Rank 1: 08/16/2016 09:57:37:  Epoch[ 2 of 5]-Minibatch[  21-  30, 37.50%]: ce = 3.85396065 * 2560; err = 0.87578125 * 2560; time = 0.8778s; samplesPerSecond = 2916.4
@ -1116,7 +1116,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:57:42: Starting Epoch 3: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:42: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:42: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:42:  Epoch[ 3 of 5]-Minibatch[   1-  10, 12.50%]: ce = 3.37945068 * 2560; err = 0.79882812 * 2560; time = 0.8417s; samplesPerSecond = 3041.5
 MPI Rank 1: 08/16/2016 09:57:43:  Epoch[ 3 of 5]-Minibatch[  11-  20, 25.00%]: ce = 3.29694288 * 2560; err = 0.79570312 * 2560; time = 0.8680s; samplesPerSecond = 2949.2
 MPI Rank 1: 08/16/2016 09:57:44:  Epoch[ 3 of 5]-Minibatch[  21-  30, 37.50%]: ce = 3.30035303 * 2560; err = 0.80468750 * 2560; time = 0.8294s; samplesPerSecond = 3086.6
@ -1130,7 +1130,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:57:48: Starting Epoch 4: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:48: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:49:  Epoch[ 4 of 5]-Minibatch[   1-  10, 12.50%]: ce = 3.01965680 * 2560; err = 0.74375000 * 2560; time = 0.8374s; samplesPerSecond = 3057.1
 MPI Rank 1: 08/16/2016 09:57:50:  Epoch[ 4 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.94570588 * 2560; err = 0.72031250 * 2560; time = 0.8488s; samplesPerSecond = 3016.0
 MPI Rank 1: 08/16/2016 09:57:51:  Epoch[ 4 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.92723926 * 2560; err = 0.71875000 * 2560; time = 0.8375s; samplesPerSecond = 3056.6
@ -1145,7 +1145,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:57:55: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:55: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:55: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:56:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.62996846 * 2560; err = 0.65039062 * 2560; time = 0.8070s; samplesPerSecond = 3172.1
 MPI Rank 1: 08/16/2016 09:57:57:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.62577187 * 2560; err = 0.66914063 * 2560; time = 0.8108s; samplesPerSecond = 3157.6
 MPI Rank 1: 08/16/2016 09:57:57:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.50552697 * 2560; err = 0.64570313 * 2560; time = 0.7916s; samplesPerSecond = 3234.0
@ -1667,7 +1667,7 @@ MPI Rank 0: 08/16/2016 09:58:04: Starting Epoch 4: learning rate per sample = 0.
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:58:04: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:58:04: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:58:05:  Epoch[ 4 of 5]-Minibatch[   1-  10, 12.50%]: ce = 3.01965680 * 2560; err = 0.74375000 * 2560; time = 0.8707s; samplesPerSecond = 2940.0
 MPI Rank 0: 08/16/2016 09:58:06:  Epoch[ 4 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.94570588 * 2560; err = 0.72031250 * 2560; time = 0.8271s; samplesPerSecond = 3095.2
 MPI Rank 0: 08/16/2016 09:58:07:  Epoch[ 4 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.92723926 * 2560; err = 0.71875000 * 2560; time = 0.8286s; samplesPerSecond = 3089.5
@ -1683,7 +1683,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:58:11: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:58:11: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:58:11: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:58:12:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.62996846 * 2560; err = 0.65039062 * 2560; time = 0.7991s; samplesPerSecond = 3203.6
 MPI Rank 0: 08/16/2016 09:58:12:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.62577187 * 2560; err = 0.66914063 * 2560; time = 0.8107s; samplesPerSecond = 3157.8
 MPI Rank 0: 08/16/2016 09:58:13:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.50552697 * 2560; err = 0.64570313 * 2560; time = 0.7913s; samplesPerSecond = 3235.3
@ -2147,7 +2147,7 @@ MPI Rank 1: 08/16/2016 09:58:04: Starting Epoch 4: learning rate per sample = 0.
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:58:04: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:58:04: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:58:05:  Epoch[ 4 of 5]-Minibatch[   1-  10, 12.50%]: ce = 3.01965680 * 2560; err = 0.74375000 * 2560; time = 0.9453s; samplesPerSecond = 2708.2
 MPI Rank 1: 08/16/2016 09:58:06:  Epoch[ 4 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.94570588 * 2560; err = 0.72031250 * 2560; time = 0.8269s; samplesPerSecond = 3095.9
 MPI Rank 1: 08/16/2016 09:58:07:  Epoch[ 4 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.92723926 * 2560; err = 0.71875000 * 2560; time = 0.8288s; samplesPerSecond = 3088.9
@ -2162,7 +2162,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:58:11: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:58:11: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:58:11: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:58:12:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.62996846 * 2560; err = 0.65039062 * 2560; time = 0.7994s; samplesPerSecond = 3202.2
 MPI Rank 1: 08/16/2016 09:58:12:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.62577187 * 2560; err = 0.66914063 * 2560; time = 0.8101s; samplesPerSecond = 3160.0
 MPI Rank 1: 08/16/2016 09:58:13:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.50552697 * 2560; err = 0.64570313 * 2560; time = 0.7917s; samplesPerSecond = 3233.4
--- a/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.gpu.txt
@ -553,7 +553,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:58:24: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: (GPU): creating curand object with seed 4
 MPI Rank 0: (GPU): creating curand object with seed 5
 MPI Rank 0: (GPU): creating curand object with seed 6
@ -573,7 +573,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:58:24: Starting Epoch 3: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: (GPU): creating curand object with seed 8
 MPI Rank 0: (GPU): creating curand object with seed 9
 MPI Rank 0: (GPU): creating curand object with seed 10
@ -592,7 +592,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:58:25: Starting Epoch 4: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:58:25: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:58:25: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: (GPU): creating curand object with seed 12
 MPI Rank 0: (GPU): creating curand object with seed 13
 MPI Rank 0: (GPU): creating curand object with seed 14
@ -612,7 +612,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:58:26: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:58:26: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:58:26: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:58:26:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.62055426 * 2560; err = 0.64882812 * 2560; time = 0.0643s; samplesPerSecond = 39801.6
 MPI Rank 0: 08/16/2016 09:58:26:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.61482008 * 2560; err = 0.66875000 * 2560; time = 0.0622s; samplesPerSecond = 41125.2
 MPI Rank 0: 08/16/2016 09:58:26:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.50300923 * 2560; err = 0.64531250 * 2560; time = 0.0617s; samplesPerSecond = 41475.0
@ -1121,7 +1121,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:58:24: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: (GPU): creating curand object with seed 24
 MPI Rank 1: (GPU): creating curand object with seed 25
 MPI Rank 1: (GPU): creating curand object with seed 26
@ -1140,7 +1140,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:58:24: Starting Epoch 3: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:58:24: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: (GPU): creating curand object with seed 28
 MPI Rank 1: (GPU): creating curand object with seed 29
 MPI Rank 1: (GPU): creating curand object with seed 30
@ -1158,7 +1158,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:58:25: Starting Epoch 4: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:58:25: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:58:25: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: (GPU): creating curand object with seed 32
 MPI Rank 1: (GPU): creating curand object with seed 33
 MPI Rank 1: (GPU): creating curand object with seed 34
@ -1177,7 +1177,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:58:26: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:58:26: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:58:26: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:58:26:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.62055426 * 2560; err = 0.64882812 * 2560; time = 0.0646s; samplesPerSecond = 39645.1
 MPI Rank 1: 08/16/2016 09:58:26:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.61482008 * 2560; err = 0.66875000 * 2560; time = 0.0624s; samplesPerSecond = 41004.0
 MPI Rank 1: 08/16/2016 09:58:26:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.50300923 * 2560; err = 0.64531250 * 2560; time = 0.0617s; samplesPerSecond = 41497.1
@ -1699,7 +1699,7 @@ MPI Rank 0: 08/16/2016 09:58:31: Starting Epoch 4: learning rate per sample = 0.
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: (GPU): creating curand object with seed 12
 MPI Rank 0: (GPU): creating curand object with seed 13
 MPI Rank 0: (GPU): creating curand object with seed 14
@ -1719,7 +1719,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:58:32: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:58:32:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.62055426 * 2560; err = 0.64882812 * 2560; time = 0.0631s; samplesPerSecond = 40566.7
 MPI Rank 0: 08/16/2016 09:58:32:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.61482008 * 2560; err = 0.66875000 * 2560; time = 0.0615s; samplesPerSecond = 41624.0
 MPI Rank 0: 08/16/2016 09:58:32:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.50300923 * 2560; err = 0.64531250 * 2560; time = 0.0617s; samplesPerSecond = 41518.7
@ -2183,7 +2183,7 @@ MPI Rank 1: 08/16/2016 09:58:31: Starting Epoch 4: learning rate per sample = 0.
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: (GPU): creating curand object with seed 32
 MPI Rank 1: (GPU): creating curand object with seed 33
 MPI Rank 1: (GPU): creating curand object with seed 34
@ -2202,7 +2202,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:58:32: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:58:32: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:58:32:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.62055426 * 2560; err = 0.64882812 * 2560; time = 0.0633s; samplesPerSecond = 40438.5
 MPI Rank 1: 08/16/2016 09:58:32:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.61482008 * 2560; err = 0.66875000 * 2560; time = 0.0615s; samplesPerSecond = 41657.2
 MPI Rank 1: 08/16/2016 09:58:32:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.50300923 * 2560; err = 0.64531250 * 2560; time = 0.0618s; samplesPerSecond = 41426.6
--- a/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.windows.cpu.txt
@ -545,7 +545,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:02:27: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:02:27: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:02:27: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:02:28:  Epoch[ 2 of 5]-Minibatch[   1-  10, 12.50%]: ce = 3.94527147 * 2560; err = 0.88906250 * 2560; time = 0.9421s; samplesPerSecond = 2717.4
 MPI Rank 0: 08/16/2016 03:02:29:  Epoch[ 2 of 5]-Minibatch[  11-  20, 25.00%]: ce = 3.86277831 * 2560; err = 0.87773437 * 2560; time = 0.9084s; samplesPerSecond = 2818.2
 MPI Rank 0: 08/16/2016 03:02:30:  Epoch[ 2 of 5]-Minibatch[  21-  30, 37.50%]: ce = 3.85494012 * 2560; err = 0.89140625 * 2560; time = 0.9825s; samplesPerSecond = 2605.5
@ -561,7 +561,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:02:35: Starting Epoch 3: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:02:35: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:02:35: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:02:36:  Epoch[ 3 of 5]-Minibatch[   1-  10, 12.50%]: ce = 3.34316119 * 2560; err = 0.79257813 * 2560; time = 1.1723s; samplesPerSecond = 2183.7
 MPI Rank 0: 08/16/2016 03:02:37:  Epoch[ 3 of 5]-Minibatch[  11-  20, 25.00%]: ce = 3.24841775 * 2560; err = 0.79257813 * 2560; time = 0.9683s; samplesPerSecond = 2643.8
 MPI Rank 0: 08/16/2016 03:02:38:  Epoch[ 3 of 5]-Minibatch[  21-  30, 37.50%]: ce = 3.25792707 * 2560; err = 0.78632813 * 2560; time = 0.8789s; samplesPerSecond = 2912.6
@ -576,7 +576,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:02:42: Starting Epoch 4: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:02:42: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:02:42: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:02:43:  Epoch[ 4 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.98185381 * 2560; err = 0.73554688 * 2560; time = 1.1723s; samplesPerSecond = 2183.8
 MPI Rank 0: 08/16/2016 03:02:44:  Epoch[ 4 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.90257182 * 2560; err = 0.70781250 * 2560; time = 1.0254s; samplesPerSecond = 2496.6
 MPI Rank 0: 08/16/2016 03:02:45:  Epoch[ 4 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.87658405 * 2560; err = 0.70937500 * 2560; time = 0.6891s; samplesPerSecond = 3714.7
@ -592,7 +592,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:02:50: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:02:50: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:02:50: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:02:51:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.58512907 * 2560; err = 0.64375000 * 2560; time = 0.8140s; samplesPerSecond = 3144.9
 MPI Rank 0: 08/16/2016 03:02:52:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.58142844 * 2560; err = 0.66601563 * 2560; time = 0.7814s; samplesPerSecond = 3276.0
 MPI Rank 0: 08/16/2016 03:02:53:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.46930316 * 2560; err = 0.64570313 * 2560; time = 1.0202s; samplesPerSecond = 2509.4
@ -1095,7 +1095,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:02:27: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:02:27: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:02:27: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:02:28:  Epoch[ 2 of 5]-Minibatch[   1-  10, 12.50%]: ce = 3.94527147 * 2560; err = 0.88906250 * 2560; time = 0.9409s; samplesPerSecond = 2720.7
 MPI Rank 1: 08/16/2016 03:02:29:  Epoch[ 2 of 5]-Minibatch[  11-  20, 25.00%]: ce = 3.86277831 * 2560; err = 0.87773437 * 2560; time = 0.9084s; samplesPerSecond = 2818.2
 MPI Rank 1: 08/16/2016 03:02:30:  Epoch[ 2 of 5]-Minibatch[  21-  30, 37.50%]: ce = 3.85494012 * 2560; err = 0.89140625 * 2560; time = 0.9825s; samplesPerSecond = 2605.7
@ -1110,7 +1110,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:02:35: Starting Epoch 3: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:02:35: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:02:35: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:02:36:  Epoch[ 3 of 5]-Minibatch[   1-  10, 12.50%]: ce = 3.34316119 * 2560; err = 0.79257813 * 2560; time = 1.1725s; samplesPerSecond = 2183.3
 MPI Rank 1: 08/16/2016 03:02:37:  Epoch[ 3 of 5]-Minibatch[  11-  20, 25.00%]: ce = 3.24841775 * 2560; err = 0.79257813 * 2560; time = 0.9674s; samplesPerSecond = 2646.2
 MPI Rank 1: 08/16/2016 03:02:38:  Epoch[ 3 of 5]-Minibatch[  21-  30, 37.50%]: ce = 3.25792707 * 2560; err = 0.78632813 * 2560; time = 0.8798s; samplesPerSecond = 2909.7
@ -1124,7 +1124,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:02:42: Starting Epoch 4: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:02:42: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:02:42: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:02:43:  Epoch[ 4 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.98185381 * 2560; err = 0.73554688 * 2560; time = 1.1737s; samplesPerSecond = 2181.1
 MPI Rank 1: 08/16/2016 03:02:44:  Epoch[ 4 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.90257182 * 2560; err = 0.70781250 * 2560; time = 1.0242s; samplesPerSecond = 2499.4
 MPI Rank 1: 08/16/2016 03:02:45:  Epoch[ 4 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.87658405 * 2560; err = 0.70937500 * 2560; time = 0.6909s; samplesPerSecond = 3705.2
@ -1139,7 +1139,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:02:50: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:02:50: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:02:50: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:02:51:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.58512907 * 2560; err = 0.64375000 * 2560; time = 0.8153s; samplesPerSecond = 3140.0
 MPI Rank 1: 08/16/2016 03:02:52:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.58142844 * 2560; err = 0.66601563 * 2560; time = 0.7794s; samplesPerSecond = 3284.6
 MPI Rank 1: 08/16/2016 03:02:53:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.46930316 * 2560; err = 0.64570313 * 2560; time = 1.0223s; samplesPerSecond = 2504.2
@ -1657,7 +1657,7 @@ MPI Rank 0: 08/16/2016 03:03:01: Starting Epoch 4: learning rate per sample = 0.
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:03:02:  Epoch[ 4 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.98185381 * 2560; err = 0.73554688 * 2560; time = 0.9824s; samplesPerSecond = 2605.9
 MPI Rank 0: 08/16/2016 03:03:03:  Epoch[ 4 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.90257182 * 2560; err = 0.70781250 * 2560; time = 1.0388s; samplesPerSecond = 2464.3
 MPI Rank 0: 08/16/2016 03:03:04:  Epoch[ 4 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.87658405 * 2560; err = 0.70937500 * 2560; time = 0.9608s; samplesPerSecond = 2664.4
@ -1673,7 +1673,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:09: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:09: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:09: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:03:10:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.58512907 * 2560; err = 0.64375000 * 2560; time = 0.9136s; samplesPerSecond = 2802.2
 MPI Rank 0: 08/16/2016 03:03:11:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.58142844 * 2560; err = 0.66601563 * 2560; time = 0.9281s; samplesPerSecond = 2758.3
 MPI Rank 0: 08/16/2016 03:03:12:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.46930316 * 2560; err = 0.64570313 * 2560; time = 1.0057s; samplesPerSecond = 2545.5
@ -2135,7 +2135,7 @@ MPI Rank 1: 08/16/2016 03:03:01: Starting Epoch 4: learning rate per sample = 0.
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:03:02:  Epoch[ 4 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.98185381 * 2560; err = 0.73554688 * 2560; time = 1.0301s; samplesPerSecond = 2485.2
 MPI Rank 1: 08/16/2016 03:03:03:  Epoch[ 4 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.90257182 * 2560; err = 0.70781250 * 2560; time = 1.0388s; samplesPerSecond = 2464.3
 MPI Rank 1: 08/16/2016 03:03:04:  Epoch[ 4 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.87658405 * 2560; err = 0.70937500 * 2560; time = 0.9608s; samplesPerSecond = 2664.5
@ -2150,7 +2150,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:09: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:09: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:09: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:03:10:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.58512907 * 2560; err = 0.64375000 * 2560; time = 0.9148s; samplesPerSecond = 2798.3
 MPI Rank 1: 08/16/2016 03:03:11:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.58142844 * 2560; err = 0.66601563 * 2560; time = 0.9282s; samplesPerSecond = 2757.9
 MPI Rank 1: 08/16/2016 03:03:12:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.46930316 * 2560; err = 0.64570313 * 2560; time = 1.0036s; samplesPerSecond = 2550.8
--- a/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/Dropout/baseline.windows.gpu.txt
@ -549,7 +549,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:24: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:24: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:24: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: (GPU): creating curand object with seed 4
 MPI Rank 0: (GPU): creating curand object with seed 5
 MPI Rank 0: (GPU): creating curand object with seed 6
@ -569,7 +569,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:25: Starting Epoch 3: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:25: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:25: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: (GPU): creating curand object with seed 8
 MPI Rank 0: (GPU): creating curand object with seed 9
 MPI Rank 0: (GPU): creating curand object with seed 10
@ -588,7 +588,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:26: Starting Epoch 4: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:26: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:26: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: (GPU): creating curand object with seed 12
 MPI Rank 0: (GPU): creating curand object with seed 13
 MPI Rank 0: (GPU): creating curand object with seed 14
@ -608,7 +608,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:27: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:27: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:27: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:03:27:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.49174185 * 2560; err = 0.61718750 * 2560; time = 0.1077s; samplesPerSecond = 23779.7
 MPI Rank 0: 08/16/2016 03:03:27:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.49057629 * 2560; err = 0.64296875 * 2560; time = 0.1058s; samplesPerSecond = 24206.9
 MPI Rank 0: 08/16/2016 03:03:27:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.37584589 * 2560; err = 0.62226563 * 2560; time = 0.1088s; samplesPerSecond = 23534.4
@ -1115,7 +1115,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:24: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:24: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:24: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: (GPU): creating curand object with seed 24
 MPI Rank 1: (GPU): creating curand object with seed 25
 MPI Rank 1: (GPU): creating curand object with seed 26
@ -1134,7 +1134,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:25: Starting Epoch 3: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:25: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:25: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: (GPU): creating curand object with seed 28
 MPI Rank 1: (GPU): creating curand object with seed 29
 MPI Rank 1: (GPU): creating curand object with seed 30
@ -1152,7 +1152,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:26: Starting Epoch 4: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:26: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:26: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: (GPU): creating curand object with seed 32
 MPI Rank 1: (GPU): creating curand object with seed 33
 MPI Rank 1: (GPU): creating curand object with seed 34
@ -1171,7 +1171,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:27: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:27: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:27: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:03:27:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.49174185 * 2560; err = 0.61718750 * 2560; time = 0.1071s; samplesPerSecond = 23903.1
 MPI Rank 1: 08/16/2016 03:03:27:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.49057629 * 2560; err = 0.64296875 * 2560; time = 0.1057s; samplesPerSecond = 24212.4
 MPI Rank 1: 08/16/2016 03:03:27:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.37584589 * 2560; err = 0.62226563 * 2560; time = 0.1088s; samplesPerSecond = 23531.1
@ -1689,7 +1689,7 @@ MPI Rank 0: 08/16/2016 03:03:33: Starting Epoch 4: learning rate per sample = 0.
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:33: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:33: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: (GPU): creating curand object with seed 12
 MPI Rank 0: (GPU): creating curand object with seed 13
 MPI Rank 0: (GPU): creating curand object with seed 14
@ -1709,7 +1709,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:34: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 0: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:34: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:34: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:03:34:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.49174185 * 2560; err = 0.61718750 * 2560; time = 0.1067s; samplesPerSecond = 23987.6
 MPI Rank 0: 08/16/2016 03:03:34:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.49057629 * 2560; err = 0.64296875 * 2560; time = 0.1040s; samplesPerSecond = 24610.7
 MPI Rank 0: 08/16/2016 03:03:34:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.37584589 * 2560; err = 0.62226563 * 2560; time = 0.1042s; samplesPerSecond = 24574.7
@ -2171,7 +2171,7 @@ MPI Rank 1: 08/16/2016 03:03:33: Starting Epoch 4: learning rate per sample = 0.
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:33: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:33: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: (GPU): creating curand object with seed 32
 MPI Rank 1: (GPU): creating curand object with seed 33
 MPI Rank 1: (GPU): creating curand object with seed 34
@ -2190,7 +2190,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:34: Starting Epoch 5: learning rate per sample = 0.001953  effective momentum = 0.900000  momentum as time constant = 2429.8 samples
 MPI Rank 1: minibatchiterator: epoch 4: frames [81920..102400] (first utterance at frame 81920), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:34: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:34: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:03:34:  Epoch[ 5 of 5]-Minibatch[   1-  10, 12.50%]: ce = 2.49174185 * 2560; err = 0.61718750 * 2560; time = 0.1072s; samplesPerSecond = 23882.2
 MPI Rank 1: 08/16/2016 03:03:34:  Epoch[ 5 of 5]-Minibatch[  11-  20, 25.00%]: ce = 2.49057629 * 2560; err = 0.64296875 * 2560; time = 0.1041s; samplesPerSecond = 24588.9
 MPI Rank 1: 08/16/2016 03:03:34:  Epoch[ 5 of 5]-Minibatch[  21-  30, 37.50%]: ce = 2.37584589 * 2560; err = 0.62226563 * 2560; time = 0.1041s; samplesPerSecond = 24588.0
--- a/Tests/EndToEndTests/Speech/DNN/Dropout/testcases.yml
+++ b/Tests/EndToEndTests/Speech/DNN/Dropout/testcases.yml
@ -34,7 +34,7 @@ testCases:
      - ^MPI Rank {{integer}}
      - Starting minibatch loop
      - DataParallelSGD training
-      - MyRank = {{integer}}
-      - NumNodes = 2
-      - NumGradientBits = 32
+      - myRank = {{integer}}
+      - numNodes = 2
+      - numGradientBits = 32
      - distributed reading is ENABLED
--- a/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.cpu.txt
@ -616,7 +616,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:07:48: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:07:49:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.17725069 * 2560; EvalClassificationError = 0.59921875 * 2560; time = 0.5690s; samplesPerSecond = 4499.5
 MPI Rank 0: 08/16/2016 10:07:50:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.13334089 * 2560; EvalClassificationError = 0.56835938 * 2560; time = 0.7981s; samplesPerSecond = 3207.4
 MPI Rank 0: 08/16/2016 10:07:50:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07967076 * 2560; EvalClassificationError = 0.56328125 * 2560; time = 0.4440s; samplesPerSecond = 5765.9
@ -631,7 +631,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:07:53: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:07:54:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97049696 * 10240; EvalClassificationError = 0.54658203 * 10240; time = 1.3527s; samplesPerSecond = 7570.1
 MPI Rank 0: 08/16/2016 10:07:56:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96995673 * 10240; EvalClassificationError = 0.54746094 * 10240; time = 1.6866s; samplesPerSecond = 6071.4
 MPI Rank 0: 08/16/2016 10:07:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.97022685 * 20480; EvalClassificationError = 0.54702148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=3.0565s
@ -1169,7 +1169,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:07:48: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:07:49:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.17725069 * 2560; EvalClassificationError = 0.59921875 * 2560; time = 0.5643s; samplesPerSecond = 4537.0
 MPI Rank 1: 08/16/2016 10:07:50:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.13334089 * 2560; EvalClassificationError = 0.56835938 * 2560; time = 0.7961s; samplesPerSecond = 3215.7
 MPI Rank 1: 08/16/2016 10:07:50:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07967076 * 2560; EvalClassificationError = 0.56328125 * 2560; time = 0.4504s; samplesPerSecond = 5684.3
@ -1183,7 +1183,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:07:53: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:07:54:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97049696 * 10240; EvalClassificationError = 0.54658203 * 10240; time = 1.3515s; samplesPerSecond = 7576.5
 MPI Rank 1: 08/16/2016 10:07:56:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96995673 * 10240; EvalClassificationError = 0.54746094 * 10240; time = 1.6863s; samplesPerSecond = 6072.6
 MPI Rank 1: 08/16/2016 10:07:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.97022685 * 20480; EvalClassificationError = 0.54702148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=3.05591s
@ -1720,7 +1720,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:07:48: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:07:49:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.17725069 * 2560; EvalClassificationError = 0.59921875 * 2560; time = 0.5710s; samplesPerSecond = 4483.5
 MPI Rank 2: 08/16/2016 10:07:50:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.13334089 * 2560; EvalClassificationError = 0.56835938 * 2560; time = 0.7894s; samplesPerSecond = 3242.9
 MPI Rank 2: 08/16/2016 10:07:50:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07967076 * 2560; EvalClassificationError = 0.56328125 * 2560; time = 0.4520s; samplesPerSecond = 5664.3
@ -1734,7 +1734,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:07:53: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:07:54:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97049696 * 10240; EvalClassificationError = 0.54658203 * 10240; time = 1.3532s; samplesPerSecond = 7567.4
 MPI Rank 2: 08/16/2016 10:07:56:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96995673 * 10240; EvalClassificationError = 0.54746094 * 10240; time = 1.6847s; samplesPerSecond = 6078.4
 MPI Rank 2: 08/16/2016 10:07:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.97022685 * 20480; EvalClassificationError = 0.54702148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=3.05602s
--- a/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.gpu.txt
@ -617,7 +617,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:08:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.1484s; samplesPerSecond = 17245.9
 MPI Rank 0: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1433s; samplesPerSecond = 17868.6
 MPI Rank 0: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1419s; samplesPerSecond = 18039.2
@ -632,7 +632,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:08:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:08:10:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.2875s; samplesPerSecond = 35614.4
 MPI Rank 0: 08/16/2016 10:08:10:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.2827s; samplesPerSecond = 36224.7
 MPI Rank 0: 08/16/2016 10:08:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.577049s
@ -1171,7 +1171,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:08:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.1487s; samplesPerSecond = 17211.0
 MPI Rank 1: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1433s; samplesPerSecond = 17870.0
 MPI Rank 1: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1419s; samplesPerSecond = 18038.8
@ -1185,7 +1185,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:08:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:08:10:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.2878s; samplesPerSecond = 35576.6
 MPI Rank 1: 08/16/2016 10:08:10:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.2827s; samplesPerSecond = 36218.6
 MPI Rank 1: 08/16/2016 10:08:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.576897s
@ -1723,7 +1723,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:08:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.1485s; samplesPerSecond = 17237.9
 MPI Rank 2: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1432s; samplesPerSecond = 17878.8
 MPI Rank 2: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1419s; samplesPerSecond = 18040.0
@ -1737,7 +1737,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:08:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:08:10:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.2880s; samplesPerSecond = 35554.3
 MPI Rank 2: 08/16/2016 10:08:10:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.2824s; samplesPerSecond = 36264.2
 MPI Rank 2: 08/16/2016 10:08:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.57713s
--- a/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt
@ -614,7 +614,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:19:07: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:19:07:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.15042048 * 2560; EvalClassificationError = 0.58867187 * 2560; time = 0.3458s; samplesPerSecond = 7402.6
 MPI Rank 0: 08/16/2016 03:19:08:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.11694314 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.3452s; samplesPerSecond = 7416.2
 MPI Rank 0: 08/16/2016 03:19:08:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.05986597 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3527s; samplesPerSecond = 7258.5
@ -629,7 +629,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:19:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:19:11:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95622782 * 10240; EvalClassificationError = 0.54609375 * 10240; time = 0.6987s; samplesPerSecond = 14654.8
 MPI Rank 0: 08/16/2016 03:19:11:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.95081435 * 10240; EvalClassificationError = 0.54355469 * 10240; time = 0.6563s; samplesPerSecond = 15601.8
 MPI Rank 0: 08/16/2016 03:19:11: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95352108 * 20480; EvalClassificationError = 0.54482422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.38257s
@ -1168,7 +1168,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:19:07: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:19:07:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.15042048 * 2560; EvalClassificationError = 0.58867187 * 2560; time = 0.3464s; samplesPerSecond = 7390.3
 MPI Rank 1: 08/16/2016 03:19:08:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.11694314 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.3453s; samplesPerSecond = 7413.0
 MPI Rank 1: 08/16/2016 03:19:08:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.05986597 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3528s; samplesPerSecond = 7255.6
@ -1182,7 +1182,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:19:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:19:11:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95622782 * 10240; EvalClassificationError = 0.54609375 * 10240; time = 0.7010s; samplesPerSecond = 14607.7
 MPI Rank 1: 08/16/2016 03:19:11:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.95081435 * 10240; EvalClassificationError = 0.54355469 * 10240; time = 0.6562s; samplesPerSecond = 15604.6
 MPI Rank 1: 08/16/2016 03:19:11: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95352108 * 20480; EvalClassificationError = 0.54482422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.38447s
@ -1720,7 +1720,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:19:07: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:19:07:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.15042048 * 2560; EvalClassificationError = 0.58867187 * 2560; time = 0.3462s; samplesPerSecond = 7394.9
 MPI Rank 2: 08/16/2016 03:19:08:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.11694314 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.3453s; samplesPerSecond = 7413.9
 MPI Rank 2: 08/16/2016 03:19:08:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.05986597 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3527s; samplesPerSecond = 7258.6
@ -1734,7 +1734,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:19:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:19:11:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95622782 * 10240; EvalClassificationError = 0.54609375 * 10240; time = 0.7014s; samplesPerSecond = 14599.6
 MPI Rank 2: 08/16/2016 03:19:11:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.95081435 * 10240; EvalClassificationError = 0.54355469 * 10240; time = 0.6564s; samplesPerSecond = 15599.2
 MPI Rank 2: 08/16/2016 03:19:11: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95352108 * 20480; EvalClassificationError = 0.54482422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.38407s
--- a/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt
@ -615,7 +615,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:19:31: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.2084s; samplesPerSecond = 12286.1
 MPI Rank 0: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1804s; samplesPerSecond = 14191.5
 MPI Rank 0: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1704s; samplesPerSecond = 15022.6
@ -630,7 +630,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:19:33: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:19:33:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.3564s; samplesPerSecond = 28732.2
 MPI Rank 0: 08/16/2016 03:19:34:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.3227s; samplesPerSecond = 31728.4
 MPI Rank 0: 08/16/2016 03:19:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.689726s
@ -1170,7 +1170,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:19:31: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:19:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:19:32: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.2063s; samplesPerSecond = 12411.0
 MPI Rank 1: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1808s; samplesPerSecond = 14158.0
 MPI Rank 1: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1701s; samplesPerSecond = 15049.2
@ -1184,7 +1184,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:19:33: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:19:33:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.3534s; samplesPerSecond = 28972.3
 MPI Rank 1: 08/16/2016 03:19:34:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.3227s; samplesPerSecond = 31731.1
 MPI Rank 1: 08/16/2016 03:19:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.690282s
@ -1723,7 +1723,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:19:31: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.2089s; samplesPerSecond = 12254.0
 MPI Rank 2: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1802s; samplesPerSecond = 14210.1
 MPI Rank 2: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1701s; samplesPerSecond = 15046.4
@ -1737,7 +1737,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:19:33: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:19:33:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.3569s; samplesPerSecond = 28689.5
 MPI Rank 2: 08/16/2016 03:19:34:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.3228s; samplesPerSecond = 31727.1
 MPI Rank 2: 08/16/2016 03:19:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.689913s
--- a/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/testcases.yml
+++ b/Tests/EndToEndTests/Speech/DNN/Parallel1BitQuantization/testcases.yml
@ -34,7 +34,7 @@ testCases:
      - ^MPI Rank {{integer}}
      - Starting minibatch loop
      - DataParallelSGD training
-      - MyRank = {{integer}}
-      - NumNodes = 3
-      - NumGradientBits = 1
+      - myRank = {{integer}}
+      - numNodes = 3
+      - numGradientBits = 1
      - distributed reading is ENABLED
--- a/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt
@ -622,7 +622,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:09:14: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.080039
 MPI Rank 0: Async gradient aggregation wait time: 9e-06
 MPI Rank 0: Actual gradient aggregation time: 0.025201
@ -670,7 +670,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:09:19: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.011011
 MPI Rank 0: Actual gradient aggregation time: 0.088497
 MPI Rank 0: Async gradient aggregation wait time: 0.026596
@ -687,7 +687,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:09:21: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
 MPI Rank 0: Actual gradient aggregation time: 0.023009
 MPI Rank 0: Async gradient aggregation wait time: 1e-05
@ -1240,7 +1240,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:09:14: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.092054
 MPI Rank 1: Async gradient aggregation wait time: 0.029108
 MPI Rank 1: Actual gradient aggregation time: 0.053094
@ -1287,7 +1287,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:09:19: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.009871
 MPI Rank 1: Actual gradient aggregation time: 0.084551
 MPI Rank 1: Async gradient aggregation wait time: 0.067075
@ -1303,7 +1303,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:09:21: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.162303
 MPI Rank 1: Actual gradient aggregation time: 0.088365
 MPI Rank 1: Async gradient aggregation wait time: 0.357011
@ -1855,7 +1855,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:09:14: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.124401
 MPI Rank 2: Async gradient aggregation wait time: 0.027767
 MPI Rank 2: Actual gradient aggregation time: 0.053848
@ -1902,7 +1902,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:09:19: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 1.1e-05
 MPI Rank 2: Actual gradient aggregation time: 0.034828
 MPI Rank 2: Async gradient aggregation wait time: 1.1e-05
@ -1918,7 +1918,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:09:21: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.144867
 MPI Rank 2: Actual gradient aggregation time: 0.087324
 MPI Rank 2: Async gradient aggregation wait time: 0.337574
--- a/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt
@ -623,7 +623,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:09:37: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.006881
 MPI Rank 0: Async gradient aggregation wait time: 0.001169
 MPI Rank 0: Actual gradient aggregation time: 0.012812
@ -671,7 +671,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:09:38: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.003256
 MPI Rank 0: Actual gradient aggregation time: 0.026681
 MPI Rank 0: Async gradient aggregation wait time: 0.001712
@ -688,7 +688,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:09:39: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.001962
 MPI Rank 0: Actual gradient aggregation time: 0.02659
 MPI Rank 0: Async gradient aggregation wait time: 0.003671
@ -1242,7 +1242,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:09:37: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.017293
 MPI Rank 1: Async gradient aggregation wait time: 0.001855
 MPI Rank 1: Actual gradient aggregation time: 0.011879
@ -1289,7 +1289,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:09:38: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.007435
 MPI Rank 1: Actual gradient aggregation time: 0.028784
 MPI Rank 1: Async gradient aggregation wait time: 0.006185
@ -1305,7 +1305,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:09:39: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.001906
 MPI Rank 1: Actual gradient aggregation time: 0.027016
 MPI Rank 1: Async gradient aggregation wait time: 0.003939
@ -1858,7 +1858,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:09:37: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.014665
 MPI Rank 2: Async gradient aggregation wait time: 0.001294
 MPI Rank 2: Actual gradient aggregation time: 0.011743
@ -1905,7 +1905,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:09:38: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 3e-06
 MPI Rank 2: Actual gradient aggregation time: 0.022531
 MPI Rank 2: Async gradient aggregation wait time: 0.011564
@ -1921,7 +1921,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:09:39: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.015928
 MPI Rank 2: Actual gradient aggregation time: 0.027468
 MPI Rank 2: Async gradient aggregation wait time: 0.001119
--- a/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt
@ -620,7 +620,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:20:28: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.017461
 MPI Rank 0: Async gradient aggregation wait time: 0.004531
 MPI Rank 0: Actual gradient aggregation time: 0.021009
@ -668,7 +668,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:20:30: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
 MPI Rank 0: Actual gradient aggregation time: 0.020512
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
@ -685,7 +685,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:20:32: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 5e-006
 MPI Rank 0: Actual gradient aggregation time: 0.018185
 MPI Rank 0: Async gradient aggregation wait time: 4e-006
@ -1239,7 +1239,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:20:28: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.029656
 MPI Rank 1: Async gradient aggregation wait time: 0.007273
 MPI Rank 1: Actual gradient aggregation time: 0.021183
@ -1286,7 +1286,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:20:30: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 2e-006
 MPI Rank 1: Actual gradient aggregation time: 0.039428
 MPI Rank 1: Async gradient aggregation wait time: 7e-006
@ -1302,7 +1302,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:20:32: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 4e-006
 MPI Rank 1: Actual gradient aggregation time: 0.032424
 MPI Rank 1: Async gradient aggregation wait time: 0.002787
@ -1855,7 +1855,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:20:28: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.032204
 MPI Rank 2: Async gradient aggregation wait time: 0.010081
 MPI Rank 2: Actual gradient aggregation time: 0.021164
@ -1902,7 +1902,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:20:30: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.019786
 MPI Rank 2: Actual gradient aggregation time: 0.040852
 MPI Rank 2: Async gradient aggregation wait time: 0.024007
@ -1918,7 +1918,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:20:32: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.011905
 MPI Rank 2: Actual gradient aggregation time: 0.051704
 MPI Rank 2: Async gradient aggregation wait time: 0.015128
--- a/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt
@ -621,7 +621,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:20:53: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.021385
 MPI Rank 0: Async gradient aggregation wait time: 0.006373
 MPI Rank 0: Actual gradient aggregation time: 0.017647
@ -669,7 +669,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:20:55: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.014735
 MPI Rank 0: Actual gradient aggregation time: 0.03433
 MPI Rank 0: Async gradient aggregation wait time: 0.004733
@ -686,7 +686,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:20:56: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.004776
 MPI Rank 0: Actual gradient aggregation time: 0.028351
 MPI Rank 0: Async gradient aggregation wait time: 0.008151
@ -1241,7 +1241,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:20:53: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.016814
 MPI Rank 1: Async gradient aggregation wait time: 0.004995
 MPI Rank 1: Actual gradient aggregation time: 0.018553
@ -1288,7 +1288,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:20:55: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.010824
 MPI Rank 1: Actual gradient aggregation time: 0.034649
 MPI Rank 1: Async gradient aggregation wait time: 0.018618
@ -1304,7 +1304,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:20:56: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.006331
 MPI Rank 1: Actual gradient aggregation time: 0.028676
 MPI Rank 1: Async gradient aggregation wait time: 0.007827
@ -1858,7 +1858,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:20:53: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.035327
 MPI Rank 2: Async gradient aggregation wait time: 0.00284
 MPI Rank 2: Actual gradient aggregation time: 0.018497
@ -1905,7 +1905,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:20:55: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 1e-006
 MPI Rank 2: Actual gradient aggregation time: 0.016322
 MPI Rank 2: Async gradient aggregation wait time: 0.013477
@ -1921,7 +1921,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:20:56: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.024966
 MPI Rank 2: Actual gradient aggregation time: 0.028835
 MPI Rank 2: Async gradient aggregation wait time: 0.002866
--- a/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/testcases.yml
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelBufferedAsyncGradientAggregation/testcases.yml
@ -34,8 +34,8 @@ testCases:
      - ^MPI Rank {{integer}}
      - Starting minibatch loop
      - DataParallelSGD training
-      - MyRank = {{integer}}
-      - NumNodes = 3
-      - NumGradientBits = 1
+      - myRank = {{integer}}
+      - numNodes = 3
+      - numGradientBits = 1
      - distributed reading is ENABLED
      - BufferedAsyncGradientAggregation is ENABLED
--- a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.cpu.txt
@ -611,7 +611,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:57:53: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:57:53: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:57:53: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:57:53:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.56962759 * 640; EvalClassificationError = 0.91093750 * 640; time = 0.2812s; samplesPerSecond = 2275.8
 MPI Rank 0: 08/16/2016 09:57:54:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.33203458 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.2752s; samplesPerSecond = 2325.6
 MPI Rank 0: 08/16/2016 09:57:54:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.97802531 * 640; EvalClassificationError = 0.86875000 * 640; time = 0.2728s; samplesPerSecond = 2346.5
@ -654,7 +654,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:58:31: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:58:31: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:58:31: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:58:32:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.12679700 * 2560; EvalClassificationError = 0.56601563 * 2560; time = 0.8259s; samplesPerSecond = 3099.5
 MPI Rank 0: 08/16/2016 09:58:32:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.04568504 * 2560; EvalClassificationError = 0.55429688 * 2560; time = 0.8085s; samplesPerSecond = 3166.4
 MPI Rank 0: 08/16/2016 09:58:33:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02935394 * 2560; EvalClassificationError = 0.54570312 * 2560; time = 0.7974s; samplesPerSecond = 3210.4
@ -672,7 +672,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:59:05: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:59:05: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:59:05: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:59:08:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.93108721 * 10240; EvalClassificationError = 0.52958984 * 10240; time = 2.8844s; samplesPerSecond = 3550.1
 MPI Rank 0: 08/16/2016 09:59:10:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.95098710 * 10240; EvalClassificationError = 0.54755859 * 10240; time = 2.8076s; samplesPerSecond = 3647.2
 MPI Rank 0: 08/16/2016 09:59:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.94103716 * 20480; EvalClassificationError = 0.53857422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=5.70009s
@ -1237,7 +1237,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:57:53: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:57:53: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:57:53: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:57:53:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.56962759 * 640; EvalClassificationError = 0.91093750 * 640; time = 0.2811s; samplesPerSecond = 2276.9
 MPI Rank 1: 08/16/2016 09:57:54:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.33203458 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.2753s; samplesPerSecond = 2325.2
 MPI Rank 1: 08/16/2016 09:57:54:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.97802531 * 640; EvalClassificationError = 0.86875000 * 640; time = 0.2721s; samplesPerSecond = 2352.1
@ -1278,7 +1278,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:58:31: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:58:31: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:58:31: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:58:32:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.12679700 * 2560; EvalClassificationError = 0.56601563 * 2560; time = 0.8264s; samplesPerSecond = 3097.6
 MPI Rank 1: 08/16/2016 09:58:32:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.04568504 * 2560; EvalClassificationError = 0.55429688 * 2560; time = 0.8080s; samplesPerSecond = 3168.2
 MPI Rank 1: 08/16/2016 09:58:33:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.02935394 * 2560; EvalClassificationError = 0.54570312 * 2560; time = 0.7977s; samplesPerSecond = 3209.3
@ -1295,7 +1295,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:59:05: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:59:05: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:59:05: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:59:08:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.93108721 * 10240; EvalClassificationError = 0.52958984 * 10240; time = 2.8859s; samplesPerSecond = 3548.3
 MPI Rank 1: 08/16/2016 09:59:10:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.95098710 * 10240; EvalClassificationError = 0.54755859 * 10240; time = 2.8075s; samplesPerSecond = 3647.4
 MPI Rank 1: 08/16/2016 09:59:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.94103716 * 20480; EvalClassificationError = 0.53857422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=5.7s
--- a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.gpu.txt
@ -612,7 +612,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:59:43: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:59:43: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:59:43: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:59:43:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.1080s; samplesPerSecond = 5925.3
 MPI Rank 0: 08/16/2016 09:59:43:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.1000s; samplesPerSecond = 6397.8
 MPI Rank 0: 08/16/2016 09:59:44:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.1013s; samplesPerSecond = 6319.2
@ -655,7 +655,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:59:49: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:59:49: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:59:49: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:59:49:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13894071 * 2560; EvalClassificationError = 0.56992188 * 2560; time = 0.1288s; samplesPerSecond = 19871.6
 MPI Rank 0: 08/16/2016 09:59:49:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.06106261 * 2560; EvalClassificationError = 0.55664062 * 2560; time = 0.1249s; samplesPerSecond = 20497.2
 MPI Rank 0: 08/16/2016 09:59:49:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.04459475 * 2560; EvalClassificationError = 0.55039063 * 2560; time = 0.1248s; samplesPerSecond = 20516.8
@ -673,7 +673,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:59:51: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:59:51: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:59:51: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:59:51:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.94336419 * 10240; EvalClassificationError = 0.53056641 * 10240; time = 0.2783s; samplesPerSecond = 36788.6
 MPI Rank 0: 08/16/2016 09:59:51:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96525554 * 10240; EvalClassificationError = 0.54873047 * 10240; time = 0.2723s; samplesPerSecond = 37610.7
 MPI Rank 0: 08/16/2016 09:59:51: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95430987 * 20480; EvalClassificationError = 0.53964844 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.560004s
@ -1239,7 +1239,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:59:43: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:59:43: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:59:43: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:59:43:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.1079s; samplesPerSecond = 5931.0
 MPI Rank 1: 08/16/2016 09:59:43:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.1000s; samplesPerSecond = 6401.4
 MPI Rank 1: 08/16/2016 09:59:44:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.1012s; samplesPerSecond = 6325.5
@ -1280,7 +1280,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:59:49: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:59:49: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:59:49: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:59:49:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13894071 * 2560; EvalClassificationError = 0.56992188 * 2560; time = 0.1286s; samplesPerSecond = 19904.8
 MPI Rank 1: 08/16/2016 09:59:49:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.06106261 * 2560; EvalClassificationError = 0.55664062 * 2560; time = 0.1249s; samplesPerSecond = 20504.1
 MPI Rank 1: 08/16/2016 09:59:49:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.04459475 * 2560; EvalClassificationError = 0.55039063 * 2560; time = 0.1247s; samplesPerSecond = 20526.1
@ -1297,7 +1297,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:59:51: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:59:51: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:59:51: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:59:51:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.94336419 * 10240; EvalClassificationError = 0.53056641 * 10240; time = 0.2793s; samplesPerSecond = 36660.2
 MPI Rank 1: 08/16/2016 09:59:51:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96525554 * 10240; EvalClassificationError = 0.54873047 * 10240; time = 0.2722s; samplesPerSecond = 37620.1
 MPI Rank 1: 08/16/2016 09:59:51: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95430987 * 20480; EvalClassificationError = 0.53964844 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.559871s
--- a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.windows.cpu.txt
@ -610,7 +610,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:02:03: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:02:03: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:02:03: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:02:03:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.56731190 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.1102s; samplesPerSecond = 5809.0
 MPI Rank 0: 08/16/2016 03:02:03:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.31208878 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.1297s; samplesPerSecond = 4932.6
 MPI Rank 0: 08/16/2016 03:02:03:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.97319840 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.1064s; samplesPerSecond = 6015.2
@ -653,7 +653,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:02:11: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:02:11: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:02:11: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:02:11:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.09963072 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.2349s; samplesPerSecond = 10898.6
 MPI Rank 0: 08/16/2016 03:02:11:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02412622 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.2179s; samplesPerSecond = 11749.9
 MPI Rank 0: 08/16/2016 03:02:11:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00477550 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.2211s; samplesPerSecond = 11580.5
@ -671,7 +671,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:02:16: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:02:16: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:02:16: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:02:17:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90951347 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.7135s; samplesPerSecond = 14352.0
 MPI Rank 0: 08/16/2016 03:02:17:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93082770 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.6161s; samplesPerSecond = 16621.4
 MPI Rank 0: 08/16/2016 03:02:17: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92017059 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.3412s
@ -1237,7 +1237,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:02:03: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:02:03: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:02:03: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:02:03:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.56731190 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.1103s; samplesPerSecond = 5804.6
 MPI Rank 1: 08/16/2016 03:02:03:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.31208878 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.1297s; samplesPerSecond = 4935.8
 MPI Rank 1: 08/16/2016 03:02:03:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.97319840 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.1064s; samplesPerSecond = 6015.3
@ -1278,7 +1278,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:02:11: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:02:11: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:02:11: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:02:11:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.09963072 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.2348s; samplesPerSecond = 10902.3
 MPI Rank 1: 08/16/2016 03:02:11:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02412622 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.2178s; samplesPerSecond = 11753.0
 MPI Rank 1: 08/16/2016 03:02:11:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00477550 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.2209s; samplesPerSecond = 11587.9
@ -1295,7 +1295,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:02:16: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:02:16: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:02:16: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:02:17:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90951347 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.7153s; samplesPerSecond = 14315.2
 MPI Rank 1: 08/16/2016 03:02:17:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93082770 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.6162s; samplesPerSecond = 16618.6
 MPI Rank 1: 08/16/2016 03:02:17: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92017059 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.34119s
--- a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/baseline.windows.gpu.txt
@ -611,7 +611,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:02:36: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:02:36: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:02:36: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:02:36:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.1600s; samplesPerSecond = 3999.8
 MPI Rank 0: 08/16/2016 03:02:36:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.1186s; samplesPerSecond = 5394.3
 MPI Rank 0: 08/16/2016 03:02:36:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.1100s; samplesPerSecond = 5815.7
@ -654,7 +654,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:02:43: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:02:43: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:02:43: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:02:43:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13894071 * 2560; EvalClassificationError = 0.56992188 * 2560; time = 0.1674s; samplesPerSecond = 15289.7
 MPI Rank 0: 08/16/2016 03:02:43:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.06106261 * 2560; EvalClassificationError = 0.55664063 * 2560; time = 0.1479s; samplesPerSecond = 17309.5
 MPI Rank 0: 08/16/2016 03:02:43:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.04459475 * 2560; EvalClassificationError = 0.55039063 * 2560; time = 0.1413s; samplesPerSecond = 18120.8
@ -672,7 +672,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:02:45: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 2, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:02:45: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:02:45: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:02:46:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.94336419 * 10240; EvalClassificationError = 0.53056641 * 10240; time = 0.3479s; samplesPerSecond = 29432.6
 MPI Rank 0: 08/16/2016 03:02:46:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96525554 * 10240; EvalClassificationError = 0.54873047 * 10240; time = 0.3252s; samplesPerSecond = 31484.1
 MPI Rank 0: 08/16/2016 03:02:46: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95430987 * 20480; EvalClassificationError = 0.53964844 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.684367s
@ -1239,7 +1239,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:02:36: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:02:36: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:02:36: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:02:36:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.62512789 * 640; EvalClassificationError = 0.94062500 * 640; time = 0.1549s; samplesPerSecond = 4132.8
 MPI Rank 1: 08/16/2016 03:02:36:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.35619366 * 640; EvalClassificationError = 0.92343750 * 640; time = 0.1179s; samplesPerSecond = 5427.5
 MPI Rank 1: 08/16/2016 03:02:36:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.97911998 * 640; EvalClassificationError = 0.89531250 * 640; time = 0.1100s; samplesPerSecond = 5817.1
@ -1280,7 +1280,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:02:43: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:02:43: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:02:43: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:02:43:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.13894071 * 2560; EvalClassificationError = 0.56992188 * 2560; time = 0.1664s; samplesPerSecond = 15385.0
 MPI Rank 1: 08/16/2016 03:02:43:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.06106261 * 2560; EvalClassificationError = 0.55664063 * 2560; time = 0.1471s; samplesPerSecond = 17399.5
 MPI Rank 1: 08/16/2016 03:02:43:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.04459475 * 2560; EvalClassificationError = 0.55039063 * 2560; time = 0.1413s; samplesPerSecond = 18123.8
@ -1297,7 +1297,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:02:45: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 2, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:02:45: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 2, NumGradientBits = 64), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:02:45: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 2, numGradientBits = 64), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:02:46:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.94336419 * 10240; EvalClassificationError = 0.53056641 * 10240; time = 0.3456s; samplesPerSecond = 29626.5
 MPI Rank 1: 08/16/2016 03:02:46:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96525554 * 10240; EvalClassificationError = 0.54873047 * 10240; time = 0.3253s; samplesPerSecond = 31482.2
 MPI Rank 1: 08/16/2016 03:02:46: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95430987 * 20480; EvalClassificationError = 0.53964844 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.684319s
--- a/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/testcases.yml
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelCrossValidation/testcases.yml
@ -43,7 +43,7 @@ testCases:
      - ^MPI Rank {{integer}}
      - Starting minibatch loop
      - DataParallelSGD training
-      - MyRank = {{integer}}
-      - NumNodes = 2
-      - NumGradientBits = 64
+      - myRank = {{integer}}
+      - numNodes = 2
+      - numGradientBits = 64
      - distributed reading is ENABLED
--- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.cpu.txt
@ -571,7 +571,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:59:56: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.62304965 * 640; EvalClassificationError = 0.93437500 * 640; time = 0.2660s; samplesPerSecond = 2405.9
 MPI Rank 0: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.38132581 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.1966s; samplesPerSecond = 3255.6
 MPI Rank 0: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98762394 * 640; EvalClassificationError = 0.88593750 * 640; time = 0.4635s; samplesPerSecond = 1380.7
@ -610,7 +610,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:00:02: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:00:03:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.16726831 * 2560; EvalClassificationError = 0.57851562 * 2560; time = 0.5967s; samplesPerSecond = 4290.2
 MPI Rank 0: 08/16/2016 10:00:03:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08752017 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3821s; samplesPerSecond = 6699.7
 MPI Rank 0: 08/16/2016 10:00:04:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07134545 * 2560; EvalClassificationError = 0.55585938 * 2560; time = 0.6943s; samplesPerSecond = 3686.9
@ -625,7 +625,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:00:06: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:00:07:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97114275 * 10240; EvalClassificationError = 0.53769531 * 10240; time = 0.9884s; samplesPerSecond = 10360.6
 MPI Rank 0: 08/16/2016 10:00:08:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.99390313 * 10240; EvalClassificationError = 0.55712891 * 10240; time = 1.4042s; samplesPerSecond = 7292.7
 MPI Rank 0: 08/16/2016 10:00:08: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.98252294 * 20480; EvalClassificationError = 0.54741211 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=2.39916s
@ -1119,7 +1119,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:59:56: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.62304965 * 640; EvalClassificationError = 0.93437500 * 640; time = 0.2613s; samplesPerSecond = 2449.3
 MPI Rank 1: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.38132581 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.1965s; samplesPerSecond = 3256.6
 MPI Rank 1: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98762394 * 640; EvalClassificationError = 0.88593750 * 640; time = 0.4674s; samplesPerSecond = 1369.3
@ -1157,7 +1157,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:00:02: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:00:03:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.16726831 * 2560; EvalClassificationError = 0.57851562 * 2560; time = 0.5922s; samplesPerSecond = 4323.0
 MPI Rank 1: 08/16/2016 10:00:03:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08752017 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3859s; samplesPerSecond = 6634.3
 MPI Rank 1: 08/16/2016 10:00:04:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07134545 * 2560; EvalClassificationError = 0.55585938 * 2560; time = 0.6904s; samplesPerSecond = 3707.8
@ -1171,7 +1171,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:00:06: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:00:07:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97114275 * 10240; EvalClassificationError = 0.53769531 * 10240; time = 0.9880s; samplesPerSecond = 10364.1
 MPI Rank 1: 08/16/2016 10:00:08:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.99390313 * 10240; EvalClassificationError = 0.55712891 * 10240; time = 1.4042s; samplesPerSecond = 7292.5
 MPI Rank 1: 08/16/2016 10:00:08: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.98252294 * 20480; EvalClassificationError = 0.54741211 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=2.39895s
@ -1664,7 +1664,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 09:59:56: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.62304965 * 640; EvalClassificationError = 0.93437500 * 640; time = 0.2618s; samplesPerSecond = 2444.4
 MPI Rank 2: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.38132581 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.1965s; samplesPerSecond = 3256.9
 MPI Rank 2: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98762394 * 640; EvalClassificationError = 0.88593750 * 640; time = 0.4698s; samplesPerSecond = 1362.4
@ -1702,7 +1702,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:00:02: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:00:03:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.16726831 * 2560; EvalClassificationError = 0.57851562 * 2560; time = 0.5955s; samplesPerSecond = 4298.6
 MPI Rank 2: 08/16/2016 10:00:03:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08752017 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3830s; samplesPerSecond = 6684.6
 MPI Rank 2: 08/16/2016 10:00:04:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07134545 * 2560; EvalClassificationError = 0.55585938 * 2560; time = 0.6922s; samplesPerSecond = 3698.2
@ -1716,7 +1716,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:00:06: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:00:07:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97114275 * 10240; EvalClassificationError = 0.53769531 * 10240; time = 0.9897s; samplesPerSecond = 10346.1
 MPI Rank 2: 08/16/2016 10:00:08:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.99390313 * 10240; EvalClassificationError = 0.55712891 * 10240; time = 1.4041s; samplesPerSecond = 7292.7
 MPI Rank 2: 08/16/2016 10:00:08: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.98252294 * 20480; EvalClassificationError = 0.54741211 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=2.39757s
--- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.gpu.txt
@ -572,7 +572,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:00:15: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0693s; samplesPerSecond = 9238.4
 MPI Rank 0: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0567s; samplesPerSecond = 11281.5
 MPI Rank 0: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0566s; samplesPerSecond = 11312.8
@ -611,7 +611,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:00:17: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.0612s; samplesPerSecond = 41819.8
 MPI Rank 0: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0599s; samplesPerSecond = 42761.5
 MPI Rank 0: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0599s; samplesPerSecond = 42762.2
@ -626,7 +626,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:00:17: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:00:18:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.0894s; samplesPerSecond = 114526.0
 MPI Rank 0: 08/16/2016 10:00:18:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.0835s; samplesPerSecond = 122653.8
 MPI Rank 0: 08/16/2016 10:00:18: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.179159s
@ -1121,7 +1121,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:00:15: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0695s; samplesPerSecond = 9204.8
 MPI Rank 1: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0563s; samplesPerSecond = 11359.2
 MPI Rank 1: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0567s; samplesPerSecond = 11286.5
@ -1159,7 +1159,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:00:17: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.0614s; samplesPerSecond = 41699.9
 MPI Rank 1: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0597s; samplesPerSecond = 42846.6
 MPI Rank 1: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0599s; samplesPerSecond = 42717.2
@ -1173,7 +1173,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:00:17: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:00:18:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.0900s; samplesPerSecond = 113825.8
 MPI Rank 1: 08/16/2016 10:00:18:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.0832s; samplesPerSecond = 123133.2
 MPI Rank 1: 08/16/2016 10:00:18: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.179295s
@ -1667,7 +1667,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:00:15: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0697s; samplesPerSecond = 9180.2
 MPI Rank 2: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0565s; samplesPerSecond = 11323.4
 MPI Rank 2: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0568s; samplesPerSecond = 11270.0
@ -1705,7 +1705,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:00:17: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.0612s; samplesPerSecond = 41826.0
 MPI Rank 2: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0597s; samplesPerSecond = 42906.2
 MPI Rank 2: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0601s; samplesPerSecond = 42627.6
@ -1719,7 +1719,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:00:17: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:00:18:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.0899s; samplesPerSecond = 113873.9
 MPI Rank 2: 08/16/2016 10:00:18:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.0835s; samplesPerSecond = 122592.2
 MPI Rank 2: 08/16/2016 10:00:18: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.179288s
--- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.windows.cpu.txt
@ -569,7 +569,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:02:57: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.56731197 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.0834s; samplesPerSecond = 7677.6
 MPI Rank 0: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.31208884 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.0883s; samplesPerSecond = 7250.5
 MPI Rank 0: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.97319817 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.0900s; samplesPerSecond = 7107.4
@ -608,7 +608,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:00: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.09962837 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.1410s; samplesPerSecond = 18159.2
 MPI Rank 0: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02412398 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.1391s; samplesPerSecond = 18410.2
 MPI Rank 0: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00477328 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.1341s; samplesPerSecond = 19084.0
@ -623,7 +623,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:01: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:03:02:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90951136 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.3803s; samplesPerSecond = 26929.2
 MPI Rank 0: 08/16/2016 03:03:02:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93082558 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.3565s; samplesPerSecond = 28721.0
 MPI Rank 0: 08/16/2016 03:03:02: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92016847 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.746914s
@ -1118,7 +1118,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:02:57: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.56731197 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.0834s; samplesPerSecond = 7671.1
 MPI Rank 1: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.31208884 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.0881s; samplesPerSecond = 7260.9
 MPI Rank 1: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.97319817 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.0901s; samplesPerSecond = 7105.9
@ -1156,7 +1156,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:00: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.09962837 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.1412s; samplesPerSecond = 18128.6
 MPI Rank 1: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02412398 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.1389s; samplesPerSecond = 18430.0
 MPI Rank 1: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00477328 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.1342s; samplesPerSecond = 19081.7
@ -1170,7 +1170,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:01: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:03:02:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90951136 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.3812s; samplesPerSecond = 26864.6
 MPI Rank 1: 08/16/2016 03:03:02:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93082558 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.3564s; samplesPerSecond = 28728.9
 MPI Rank 1: 08/16/2016 03:03:02: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92016847 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.746915s
@ -1664,7 +1664,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:02:57: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.56731197 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.0829s; samplesPerSecond = 7722.7
 MPI Rank 2: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.31208884 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.0881s; samplesPerSecond = 7266.9
 MPI Rank 2: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.97319817 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.0901s; samplesPerSecond = 7102.3
@ -1702,7 +1702,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:03:00: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.09962837 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.1413s; samplesPerSecond = 18119.0
 MPI Rank 2: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02412398 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.1389s; samplesPerSecond = 18436.9
 MPI Rank 2: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00477328 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.1341s; samplesPerSecond = 19089.9
@ -1716,7 +1716,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:03:01: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:03:02:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90951136 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.3813s; samplesPerSecond = 26856.3
 MPI Rank 2: 08/16/2016 03:03:02:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93082558 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.3565s; samplesPerSecond = 28725.1
 MPI Rank 2: 08/16/2016 03:03:02: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92016847 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.746861s
--- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/baseline.windows.gpu.txt
@ -570,7 +570,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:17: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.1000s; samplesPerSecond = 6400.3
 MPI Rank 0: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0738s; samplesPerSecond = 8674.2
 MPI Rank 0: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0762s; samplesPerSecond = 8396.9
@ -609,7 +609,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:19: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.1045s; samplesPerSecond = 24493.4
 MPI Rank 0: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0862s; samplesPerSecond = 29707.7
 MPI Rank 0: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0768s; samplesPerSecond = 33337.7
@ -624,7 +624,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:20: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:03:20:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.1518s; samplesPerSecond = 67460.3
 MPI Rank 0: 08/16/2016 03:03:20:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.1251s; samplesPerSecond = 81832.3
 MPI Rank 0: 08/16/2016 03:03:20: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.287003s
@ -1120,7 +1120,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:17: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0996s; samplesPerSecond = 6427.7
 MPI Rank 1: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0742s; samplesPerSecond = 8629.6
 MPI Rank 1: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0762s; samplesPerSecond = 8393.6
@ -1158,7 +1158,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:19: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.1047s; samplesPerSecond = 24439.6
 MPI Rank 1: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0858s; samplesPerSecond = 29832.0
 MPI Rank 1: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0767s; samplesPerSecond = 33361.1
@ -1172,7 +1172,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:20: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:03:20:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.1520s; samplesPerSecond = 67362.2
 MPI Rank 1: 08/16/2016 03:03:20:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.1252s; samplesPerSecond = 81779.3
 MPI Rank 1: 08/16/2016 03:03:20: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.287153s
@ -1667,7 +1667,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:03:17: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0986s; samplesPerSecond = 6490.4
 MPI Rank 2: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0740s; samplesPerSecond = 8642.9
 MPI Rank 2: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0763s; samplesPerSecond = 8383.7
@ -1705,7 +1705,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:03:19: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.1034s; samplesPerSecond = 24758.7
 MPI Rank 2: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0861s; samplesPerSecond = 29745.7
 MPI Rank 2: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0769s; samplesPerSecond = 33306.0
@ -1719,7 +1719,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:03:20: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:03:20:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.1506s; samplesPerSecond = 67980.7
 MPI Rank 2: 08/16/2016 03:03:20:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.1252s; samplesPerSecond = 81781.3
 MPI Rank 2: 08/16/2016 03:03:20: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.286844s
--- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/testcases.yml
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantization/testcases.yml
@ -34,7 +34,7 @@ testCases:
      - ^MPI Rank {{integer}}
      - Starting minibatch loop
      - DataParallelSGD training
-      - MyRank = {{integer}}
-      - NumNodes = 3
-      - NumGradientBits = 32
+      - myRank = {{integer}}
+      - numNodes = 3
+      - numGradientBits = 32
      - distributed reading is ENABLED
--- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt
@ -622,7 +622,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:58:46: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.008473
 MPI Rank 0: Async gradient aggregation wait time: 0.00554
 MPI Rank 0: Actual gradient aggregation time: 0.020395
@ -670,7 +670,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:58:51: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
 MPI Rank 0: Actual gradient aggregation time: 0.009207
 MPI Rank 0: Async gradient aggregation wait time: 9e-06
@ -687,7 +687,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:58:54: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.0046
 MPI Rank 0: Actual gradient aggregation time: 0.069203
 MPI Rank 0: Async gradient aggregation wait time: 0.041271
@ -1240,7 +1240,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:58:46: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.063039
 MPI Rank 1: Async gradient aggregation wait time: 0.022678
 MPI Rank 1: Actual gradient aggregation time: 0.025025
@ -1287,7 +1287,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:58:51: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.142203
 MPI Rank 1: Actual gradient aggregation time: 0.157984
 MPI Rank 1: Async gradient aggregation wait time: 0.179014
@ -1303,7 +1303,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:58:54: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.04561
 MPI Rank 1: Actual gradient aggregation time: 0.077514
 MPI Rank 1: Async gradient aggregation wait time: 0.069416
@ -1855,7 +1855,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 09:58:46: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.041047
 MPI Rank 2: Async gradient aggregation wait time: 8e-06
 MPI Rank 2: Actual gradient aggregation time: 0.004986
@ -1902,7 +1902,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 09:58:51: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 9e-06
 MPI Rank 2: Actual gradient aggregation time: 0.149714
 MPI Rank 2: Async gradient aggregation wait time: 0.120817
@ -1918,7 +1918,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 09:58:54: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 9e-06
 MPI Rank 2: Actual gradient aggregation time: 0.002705
 MPI Rank 2: Async gradient aggregation wait time: 9e-06
--- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt
@ -623,7 +623,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:59:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.01227
 MPI Rank 0: Async gradient aggregation wait time: 0.00776
 MPI Rank 0: Actual gradient aggregation time: 0.011351
@ -671,7 +671,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:59:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.000306
 MPI Rank 0: Actual gradient aggregation time: 0.017813
 MPI Rank 0: Async gradient aggregation wait time: 0.003066
@ -688,7 +688,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:59:10: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.003211
 MPI Rank 0: Actual gradient aggregation time: 0.026824
 MPI Rank 0: Async gradient aggregation wait time: 0.002719
@ -1242,7 +1242,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:59:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.0237
 MPI Rank 1: Async gradient aggregation wait time: 0.00528
 MPI Rank 1: Actual gradient aggregation time: 0.011811
@ -1289,7 +1289,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:59:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.013339
 MPI Rank 1: Actual gradient aggregation time: 0.032552
 MPI Rank 1: Async gradient aggregation wait time: 0.007984
@ -1305,7 +1305,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:59:10: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.002841
 MPI Rank 1: Actual gradient aggregation time: 0.027316
 MPI Rank 1: Async gradient aggregation wait time: 0.002301
@ -1858,7 +1858,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 09:59:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.028506
 MPI Rank 2: Async gradient aggregation wait time: 0.004749
 MPI Rank 2: Actual gradient aggregation time: 0.011317
@ -1905,7 +1905,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 09:59:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.003625
 MPI Rank 2: Actual gradient aggregation time: 0.032563
 MPI Rank 2: Async gradient aggregation wait time: 0.012159
@ -1921,7 +1921,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 09:59:10: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.016234
 MPI Rank 2: Actual gradient aggregation time: 0.026881
 MPI Rank 2: Async gradient aggregation wait time: 0.005475
--- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt
@ -617,7 +617,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:54: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.010875
 MPI Rank 0: Async gradient aggregation wait time: 0.046041
 MPI Rank 0: Actual gradient aggregation time: 0.070247
@ -665,7 +665,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:59: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 2e-006
 MPI Rank 0: Actual gradient aggregation time: 0.106492
 MPI Rank 0: Async gradient aggregation wait time: 2e-006
@ -682,7 +682,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:04:02: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
 MPI Rank 0: Actual gradient aggregation time: 0.010023
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
@ -1233,7 +1233,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:54: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.033393
 MPI Rank 1: Async gradient aggregation wait time: 0.005092
 MPI Rank 1: Actual gradient aggregation time: 0.070288
@ -1280,7 +1280,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:59: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.056538
 MPI Rank 1: Actual gradient aggregation time: 0.181181
 MPI Rank 1: Async gradient aggregation wait time: 3e-006
@ -1296,7 +1296,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:04:02: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 2e-006
 MPI Rank 1: Actual gradient aggregation time: 0.028628
 MPI Rank 1: Async gradient aggregation wait time: 0.062352
@ -1846,7 +1846,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:03:54: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.040872
 MPI Rank 2: Async gradient aggregation wait time: 0.04797
 MPI Rank 2: Actual gradient aggregation time: 0.070448
@ -1893,7 +1893,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:03:59: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.103418
 MPI Rank 2: Actual gradient aggregation time: 0.168332
 MPI Rank 2: Async gradient aggregation wait time: 0.014615
@ -1909,7 +1909,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:04:02: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 3e-006
 MPI Rank 2: Actual gradient aggregation time: 0.092817
 MPI Rank 2: Async gradient aggregation wait time: 0.095403
--- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt
@ -618,7 +618,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:04:21: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.01782
 MPI Rank 0: Async gradient aggregation wait time: 0.005297
 MPI Rank 0: Actual gradient aggregation time: 0.025182
@ -666,7 +666,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:04:23: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.003725
 MPI Rank 0: Actual gradient aggregation time: 0.069103
 MPI Rank 0: Async gradient aggregation wait time: 0.001861
@ -683,7 +683,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:04:25: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.033541
 MPI Rank 0: Actual gradient aggregation time: 0.07365
 MPI Rank 0: Async gradient aggregation wait time: 0.011228
@ -1235,7 +1235,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:04:21: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.020729
 MPI Rank 1: Async gradient aggregation wait time: 0.009212
 MPI Rank 1: Actual gradient aggregation time: 0.025214
@ -1282,7 +1282,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:04:23: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.061455
 MPI Rank 1: Actual gradient aggregation time: 0.070776
 MPI Rank 1: Async gradient aggregation wait time: 0.04993
@ -1298,7 +1298,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:04:25: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.006886
 MPI Rank 1: Actual gradient aggregation time: 0.071953
 MPI Rank 1: Async gradient aggregation wait time: 0.012085
@ -1849,7 +1849,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:04:21: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.03301
 MPI Rank 2: Async gradient aggregation wait time: 0.004502
 MPI Rank 2: Actual gradient aggregation time: 0.025447
@ -1896,7 +1896,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:04:23: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.001929
 MPI Rank 2: Actual gradient aggregation time: 0.069767
 MPI Rank 2: Async gradient aggregation wait time: 0.051731
@ -1912,7 +1912,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:04:25: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.005577
 MPI Rank 2: Actual gradient aggregation time: 0.072623
 MPI Rank 2: Async gradient aggregation wait time: 0.000919
--- a/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/testcases.yml
+++ b/Tests/EndToEndTests/Speech/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/testcases.yml
@ -34,8 +34,8 @@ testCases:
      - ^MPI Rank {{integer}}
      - Starting minibatch loop
      - DataParallelSGD training
-      - MyRank = {{integer}}
-      - NumNodes = 3
-      - NumGradientBits = 64
+      - myRank = {{integer}}
+      - numNodes = 3
+      - numGradientBits = 64
      - distributed reading is ENABLED
      - BufferedAsyncGradientAggregation is ENABLED
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.cpu.txt
@ -616,7 +616,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:07:48: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:07:49:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.17725069 * 2560; EvalClassificationError = 0.59921875 * 2560; time = 0.5690s; samplesPerSecond = 4499.5
 MPI Rank 0: 08/16/2016 10:07:50:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.13334089 * 2560; EvalClassificationError = 0.56835938 * 2560; time = 0.7981s; samplesPerSecond = 3207.4
 MPI Rank 0: 08/16/2016 10:07:50:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07967076 * 2560; EvalClassificationError = 0.56328125 * 2560; time = 0.4440s; samplesPerSecond = 5765.9
@ -631,7 +631,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:07:53: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:07:54:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97049696 * 10240; EvalClassificationError = 0.54658203 * 10240; time = 1.3527s; samplesPerSecond = 7570.1
 MPI Rank 0: 08/16/2016 10:07:56:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96995673 * 10240; EvalClassificationError = 0.54746094 * 10240; time = 1.6866s; samplesPerSecond = 6071.4
 MPI Rank 0: 08/16/2016 10:07:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.97022685 * 20480; EvalClassificationError = 0.54702148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=3.0565s
@ -1169,7 +1169,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:07:48: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:07:49:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.17725069 * 2560; EvalClassificationError = 0.59921875 * 2560; time = 0.5643s; samplesPerSecond = 4537.0
 MPI Rank 1: 08/16/2016 10:07:50:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.13334089 * 2560; EvalClassificationError = 0.56835938 * 2560; time = 0.7961s; samplesPerSecond = 3215.7
 MPI Rank 1: 08/16/2016 10:07:50:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07967076 * 2560; EvalClassificationError = 0.56328125 * 2560; time = 0.4504s; samplesPerSecond = 5684.3
@ -1183,7 +1183,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:07:53: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:07:54:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97049696 * 10240; EvalClassificationError = 0.54658203 * 10240; time = 1.3515s; samplesPerSecond = 7576.5
 MPI Rank 1: 08/16/2016 10:07:56:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96995673 * 10240; EvalClassificationError = 0.54746094 * 10240; time = 1.6863s; samplesPerSecond = 6072.6
 MPI Rank 1: 08/16/2016 10:07:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.97022685 * 20480; EvalClassificationError = 0.54702148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=3.05591s
@ -1720,7 +1720,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:07:48: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:07:48: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:07:49:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.17725069 * 2560; EvalClassificationError = 0.59921875 * 2560; time = 0.5710s; samplesPerSecond = 4483.5
 MPI Rank 2: 08/16/2016 10:07:50:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.13334089 * 2560; EvalClassificationError = 0.56835938 * 2560; time = 0.7894s; samplesPerSecond = 3242.9
 MPI Rank 2: 08/16/2016 10:07:50:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07967076 * 2560; EvalClassificationError = 0.56328125 * 2560; time = 0.4520s; samplesPerSecond = 5664.3
@ -1734,7 +1734,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:07:53: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:07:53: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:07:54:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97049696 * 10240; EvalClassificationError = 0.54658203 * 10240; time = 1.3532s; samplesPerSecond = 7567.4
 MPI Rank 2: 08/16/2016 10:07:56:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.96995673 * 10240; EvalClassificationError = 0.54746094 * 10240; time = 1.6847s; samplesPerSecond = 6078.4
 MPI Rank 2: 08/16/2016 10:07:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.97022685 * 20480; EvalClassificationError = 0.54702148 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=3.05602s
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.gpu.txt
@ -617,7 +617,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:08:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.1484s; samplesPerSecond = 17245.9
 MPI Rank 0: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1433s; samplesPerSecond = 17868.6
 MPI Rank 0: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1419s; samplesPerSecond = 18039.2
@ -632,7 +632,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:08:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:08:10:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.2875s; samplesPerSecond = 35614.4
 MPI Rank 0: 08/16/2016 10:08:10:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.2827s; samplesPerSecond = 36224.7
 MPI Rank 0: 08/16/2016 10:08:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.577049s
@ -1171,7 +1171,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:08:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.1487s; samplesPerSecond = 17211.0
 MPI Rank 1: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1433s; samplesPerSecond = 17870.0
 MPI Rank 1: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1419s; samplesPerSecond = 18038.8
@ -1185,7 +1185,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:08:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:08:10:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.2878s; samplesPerSecond = 35576.6
 MPI Rank 1: 08/16/2016 10:08:10:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.2827s; samplesPerSecond = 36218.6
 MPI Rank 1: 08/16/2016 10:08:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.576897s
@ -1723,7 +1723,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:08:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:08:09: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.1485s; samplesPerSecond = 17237.9
 MPI Rank 2: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1432s; samplesPerSecond = 17878.8
 MPI Rank 2: 08/16/2016 10:08:09:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1419s; samplesPerSecond = 18040.0
@ -1737,7 +1737,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:08:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:08:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:08:10:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.2880s; samplesPerSecond = 35554.3
 MPI Rank 2: 08/16/2016 10:08:10:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.2824s; samplesPerSecond = 36264.2
 MPI Rank 2: 08/16/2016 10:08:10: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.57713s
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.windows.cpu.txt
@ -614,7 +614,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:19:07: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:19:07:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.15042048 * 2560; EvalClassificationError = 0.58867187 * 2560; time = 0.3458s; samplesPerSecond = 7402.6
 MPI Rank 0: 08/16/2016 03:19:08:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.11694314 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.3452s; samplesPerSecond = 7416.2
 MPI Rank 0: 08/16/2016 03:19:08:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.05986597 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3527s; samplesPerSecond = 7258.5
@ -629,7 +629,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:19:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:19:11:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95622782 * 10240; EvalClassificationError = 0.54609375 * 10240; time = 0.6987s; samplesPerSecond = 14654.8
 MPI Rank 0: 08/16/2016 03:19:11:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.95081435 * 10240; EvalClassificationError = 0.54355469 * 10240; time = 0.6563s; samplesPerSecond = 15601.8
 MPI Rank 0: 08/16/2016 03:19:11: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95352108 * 20480; EvalClassificationError = 0.54482422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.38257s
@ -1168,7 +1168,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:19:07: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:19:07:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.15042048 * 2560; EvalClassificationError = 0.58867187 * 2560; time = 0.3464s; samplesPerSecond = 7390.3
 MPI Rank 1: 08/16/2016 03:19:08:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.11694314 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.3453s; samplesPerSecond = 7413.0
 MPI Rank 1: 08/16/2016 03:19:08:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.05986597 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3528s; samplesPerSecond = 7255.6
@ -1182,7 +1182,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:19:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:19:11:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95622782 * 10240; EvalClassificationError = 0.54609375 * 10240; time = 0.7010s; samplesPerSecond = 14607.7
 MPI Rank 1: 08/16/2016 03:19:11:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.95081435 * 10240; EvalClassificationError = 0.54355469 * 10240; time = 0.6562s; samplesPerSecond = 15604.6
 MPI Rank 1: 08/16/2016 03:19:11: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95352108 * 20480; EvalClassificationError = 0.54482422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.38447s
@ -1720,7 +1720,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:19:07: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:19:07: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:19:07:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.15042048 * 2560; EvalClassificationError = 0.58867187 * 2560; time = 0.3462s; samplesPerSecond = 7394.9
 MPI Rank 2: 08/16/2016 03:19:08:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.11694314 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.3453s; samplesPerSecond = 7413.9
 MPI Rank 2: 08/16/2016 03:19:08:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.05986597 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3527s; samplesPerSecond = 7258.6
@ -1734,7 +1734,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:19:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:19:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:19:11:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95622782 * 10240; EvalClassificationError = 0.54609375 * 10240; time = 0.7014s; samplesPerSecond = 14599.6
 MPI Rank 2: 08/16/2016 03:19:11:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.95081435 * 10240; EvalClassificationError = 0.54355469 * 10240; time = 0.6564s; samplesPerSecond = 15599.2
 MPI Rank 2: 08/16/2016 03:19:11: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.95352108 * 20480; EvalClassificationError = 0.54482422 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=1.38407s
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/baseline.windows.gpu.txt
@ -615,7 +615,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:19:31: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.2084s; samplesPerSecond = 12286.1
 MPI Rank 0: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1804s; samplesPerSecond = 14191.5
 MPI Rank 0: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1704s; samplesPerSecond = 15022.6
@ -630,7 +630,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:19:33: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:19:33:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.3564s; samplesPerSecond = 28732.2
 MPI Rank 0: 08/16/2016 03:19:34:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.3227s; samplesPerSecond = 31728.4
 MPI Rank 0: 08/16/2016 03:19:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.689726s
@ -1170,7 +1170,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:19:31: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:19:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:19:32: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.2063s; samplesPerSecond = 12411.0
 MPI Rank 1: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1808s; samplesPerSecond = 14158.0
 MPI Rank 1: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1701s; samplesPerSecond = 15049.2
@ -1184,7 +1184,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:19:33: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:19:33:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.3534s; samplesPerSecond = 28972.3
 MPI Rank 1: 08/16/2016 03:19:34:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.3227s; samplesPerSecond = 31731.1
 MPI Rank 1: 08/16/2016 03:19:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.690282s
@ -1723,7 +1723,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:19:31: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:19:31: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.19429672 * 2560; EvalClassificationError = 0.60039062 * 2560; time = 0.2089s; samplesPerSecond = 12254.0
 MPI Rank 2: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.15577544 * 2560; EvalClassificationError = 0.57070312 * 2560; time = 0.1802s; samplesPerSecond = 14210.1
 MPI Rank 2: 08/16/2016 03:19:32:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.09655267 * 2560; EvalClassificationError = 0.56289062 * 2560; time = 0.1701s; samplesPerSecond = 15046.4
@ -1737,7 +1737,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:19:33: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:19:33: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:19:33:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.95876979 * 10240; EvalClassificationError = 0.53154297 * 10240; time = 0.3569s; samplesPerSecond = 28689.5
 MPI Rank 2: 08/16/2016 03:19:34:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.97868556 * 10240; EvalClassificationError = 0.55019531 * 10240; time = 0.3228s; samplesPerSecond = 31727.1
 MPI Rank 2: 08/16/2016 03:19:34: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.96872768 * 20480; EvalClassificationError = 0.54086914 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.689913s
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/testcases.yml
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/Parallel1BitQuantization/testcases.yml
@ -34,7 +34,7 @@ testCases:
      - ^MPI Rank {{integer}}
      - Starting minibatch loop
      - DataParallelSGD training
-      - MyRank = {{integer}}
-      - NumNodes = 3
-      - NumGradientBits = 1
+      - myRank = {{integer}}
+      - numNodes = 3
+      - numGradientBits = 1
      - distributed reading is ENABLED
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.cpu.txt
@ -622,7 +622,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:09:14: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.080039
 MPI Rank 0: Async gradient aggregation wait time: 9e-06
 MPI Rank 0: Actual gradient aggregation time: 0.025201
@ -670,7 +670,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:09:19: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.011011
 MPI Rank 0: Actual gradient aggregation time: 0.088497
 MPI Rank 0: Async gradient aggregation wait time: 0.026596
@ -687,7 +687,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:09:21: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
 MPI Rank 0: Actual gradient aggregation time: 0.023009
 MPI Rank 0: Async gradient aggregation wait time: 1e-05
@ -1240,7 +1240,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:09:14: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.092054
 MPI Rank 1: Async gradient aggregation wait time: 0.029108
 MPI Rank 1: Actual gradient aggregation time: 0.053094
@ -1287,7 +1287,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:09:19: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.009871
 MPI Rank 1: Actual gradient aggregation time: 0.084551
 MPI Rank 1: Async gradient aggregation wait time: 0.067075
@ -1303,7 +1303,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:09:21: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.162303
 MPI Rank 1: Actual gradient aggregation time: 0.088365
 MPI Rank 1: Async gradient aggregation wait time: 0.357011
@ -1855,7 +1855,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:09:14: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:09:14: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.124401
 MPI Rank 2: Async gradient aggregation wait time: 0.027767
 MPI Rank 2: Actual gradient aggregation time: 0.053848
@ -1902,7 +1902,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:09:19: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:09:19: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 1.1e-05
 MPI Rank 2: Actual gradient aggregation time: 0.034828
 MPI Rank 2: Async gradient aggregation wait time: 1.1e-05
@ -1918,7 +1918,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:09:21: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:09:21: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.144867
 MPI Rank 2: Actual gradient aggregation time: 0.087324
 MPI Rank 2: Async gradient aggregation wait time: 0.337574
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.gpu.txt
@ -623,7 +623,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:09:37: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.006881
 MPI Rank 0: Async gradient aggregation wait time: 0.001169
 MPI Rank 0: Actual gradient aggregation time: 0.012812
@ -671,7 +671,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:09:38: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.003256
 MPI Rank 0: Actual gradient aggregation time: 0.026681
 MPI Rank 0: Async gradient aggregation wait time: 0.001712
@ -688,7 +688,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:09:39: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.001962
 MPI Rank 0: Actual gradient aggregation time: 0.02659
 MPI Rank 0: Async gradient aggregation wait time: 0.003671
@ -1242,7 +1242,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:09:37: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.017293
 MPI Rank 1: Async gradient aggregation wait time: 0.001855
 MPI Rank 1: Actual gradient aggregation time: 0.011879
@ -1289,7 +1289,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:09:38: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.007435
 MPI Rank 1: Actual gradient aggregation time: 0.028784
 MPI Rank 1: Async gradient aggregation wait time: 0.006185
@ -1305,7 +1305,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:09:39: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.001906
 MPI Rank 1: Actual gradient aggregation time: 0.027016
 MPI Rank 1: Async gradient aggregation wait time: 0.003939
@ -1858,7 +1858,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:09:37: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:09:37: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.014665
 MPI Rank 2: Async gradient aggregation wait time: 0.001294
 MPI Rank 2: Actual gradient aggregation time: 0.011743
@ -1905,7 +1905,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:09:38: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:09:38: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 3e-06
 MPI Rank 2: Actual gradient aggregation time: 0.022531
 MPI Rank 2: Async gradient aggregation wait time: 0.011564
@ -1921,7 +1921,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:09:39: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:09:39: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.015928
 MPI Rank 2: Actual gradient aggregation time: 0.027468
 MPI Rank 2: Async gradient aggregation wait time: 0.001119
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.cpu.txt
@ -620,7 +620,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:20:28: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.017461
 MPI Rank 0: Async gradient aggregation wait time: 0.004531
 MPI Rank 0: Actual gradient aggregation time: 0.021009
@ -668,7 +668,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:20:30: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
 MPI Rank 0: Actual gradient aggregation time: 0.020512
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
@ -685,7 +685,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:20:32: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 5e-006
 MPI Rank 0: Actual gradient aggregation time: 0.018185
 MPI Rank 0: Async gradient aggregation wait time: 4e-006
@ -1239,7 +1239,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:20:28: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.029656
 MPI Rank 1: Async gradient aggregation wait time: 0.007273
 MPI Rank 1: Actual gradient aggregation time: 0.021183
@ -1286,7 +1286,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:20:30: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 2e-006
 MPI Rank 1: Actual gradient aggregation time: 0.039428
 MPI Rank 1: Async gradient aggregation wait time: 7e-006
@ -1302,7 +1302,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:20:32: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 4e-006
 MPI Rank 1: Actual gradient aggregation time: 0.032424
 MPI Rank 1: Async gradient aggregation wait time: 0.002787
@ -1855,7 +1855,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:20:28: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:20:28: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.032204
 MPI Rank 2: Async gradient aggregation wait time: 0.010081
 MPI Rank 2: Actual gradient aggregation time: 0.021164
@ -1902,7 +1902,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:20:30: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:20:30: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.019786
 MPI Rank 2: Actual gradient aggregation time: 0.040852
 MPI Rank 2: Async gradient aggregation wait time: 0.024007
@ -1918,7 +1918,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:20:32: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:20:32: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.011905
 MPI Rank 2: Actual gradient aggregation time: 0.051704
 MPI Rank 2: Async gradient aggregation wait time: 0.015128
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/baseline.windows.gpu.txt
@ -621,7 +621,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:20:53: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.021385
 MPI Rank 0: Async gradient aggregation wait time: 0.006373
 MPI Rank 0: Actual gradient aggregation time: 0.017647
@ -669,7 +669,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:20:55: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.014735
 MPI Rank 0: Actual gradient aggregation time: 0.03433
 MPI Rank 0: Async gradient aggregation wait time: 0.004733
@ -686,7 +686,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:20:56: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.004776
 MPI Rank 0: Actual gradient aggregation time: 0.028351
 MPI Rank 0: Async gradient aggregation wait time: 0.008151
@ -1241,7 +1241,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:20:53: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.016814
 MPI Rank 1: Async gradient aggregation wait time: 0.004995
 MPI Rank 1: Actual gradient aggregation time: 0.018553
@ -1288,7 +1288,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:20:55: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.010824
 MPI Rank 1: Actual gradient aggregation time: 0.034649
 MPI Rank 1: Async gradient aggregation wait time: 0.018618
@ -1304,7 +1304,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:20:56: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.006331
 MPI Rank 1: Actual gradient aggregation time: 0.028676
 MPI Rank 1: Async gradient aggregation wait time: 0.007827
@ -1858,7 +1858,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:20:53: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:20:53: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.035327
 MPI Rank 2: Async gradient aggregation wait time: 0.00284
 MPI Rank 2: Actual gradient aggregation time: 0.018497
@ -1905,7 +1905,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:20:55: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:20:55: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 1e-006
 MPI Rank 2: Actual gradient aggregation time: 0.016322
 MPI Rank 2: Async gradient aggregation wait time: 0.013477
@ -1921,7 +1921,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:20:56: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:20:56: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 1), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.024966
 MPI Rank 2: Actual gradient aggregation time: 0.028835
 MPI Rank 2: Async gradient aggregation wait time: 0.002866
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/testcases.yml
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelBufferedAsyncGradientAggregation/testcases.yml
@ -34,8 +34,8 @@ testCases:
      - ^MPI Rank {{integer}}
      - Starting minibatch loop
      - DataParallelSGD training
-      - MyRank = {{integer}}
-      - NumNodes = 3
-      - NumGradientBits = 1
+      - myRank = {{integer}}
+      - numNodes = 3
+      - numGradientBits = 1
      - distributed reading is ENABLED
      - BufferedAsyncGradientAggregation is ENABLED
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.cpu.txt
@ -571,7 +571,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:59:56: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.62304965 * 640; EvalClassificationError = 0.93437500 * 640; time = 0.2660s; samplesPerSecond = 2405.9
 MPI Rank 0: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.38132581 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.1966s; samplesPerSecond = 3255.6
 MPI Rank 0: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98762394 * 640; EvalClassificationError = 0.88593750 * 640; time = 0.4635s; samplesPerSecond = 1380.7
@ -610,7 +610,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:00:02: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:00:03:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.16726831 * 2560; EvalClassificationError = 0.57851562 * 2560; time = 0.5967s; samplesPerSecond = 4290.2
 MPI Rank 0: 08/16/2016 10:00:03:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08752017 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3821s; samplesPerSecond = 6699.7
 MPI Rank 0: 08/16/2016 10:00:04:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07134545 * 2560; EvalClassificationError = 0.55585938 * 2560; time = 0.6943s; samplesPerSecond = 3686.9
@ -625,7 +625,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:00:06: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:00:07:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97114275 * 10240; EvalClassificationError = 0.53769531 * 10240; time = 0.9884s; samplesPerSecond = 10360.6
 MPI Rank 0: 08/16/2016 10:00:08:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.99390313 * 10240; EvalClassificationError = 0.55712891 * 10240; time = 1.4042s; samplesPerSecond = 7292.7
 MPI Rank 0: 08/16/2016 10:00:08: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.98252294 * 20480; EvalClassificationError = 0.54741211 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=2.39916s
@ -1119,7 +1119,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:59:56: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.62304965 * 640; EvalClassificationError = 0.93437500 * 640; time = 0.2613s; samplesPerSecond = 2449.3
 MPI Rank 1: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.38132581 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.1965s; samplesPerSecond = 3256.6
 MPI Rank 1: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98762394 * 640; EvalClassificationError = 0.88593750 * 640; time = 0.4674s; samplesPerSecond = 1369.3
@ -1157,7 +1157,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:00:02: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:00:03:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.16726831 * 2560; EvalClassificationError = 0.57851562 * 2560; time = 0.5922s; samplesPerSecond = 4323.0
 MPI Rank 1: 08/16/2016 10:00:03:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08752017 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3859s; samplesPerSecond = 6634.3
 MPI Rank 1: 08/16/2016 10:00:04:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07134545 * 2560; EvalClassificationError = 0.55585938 * 2560; time = 0.6904s; samplesPerSecond = 3707.8
@ -1171,7 +1171,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:00:06: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:00:07:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97114275 * 10240; EvalClassificationError = 0.53769531 * 10240; time = 0.9880s; samplesPerSecond = 10364.1
 MPI Rank 1: 08/16/2016 10:00:08:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.99390313 * 10240; EvalClassificationError = 0.55712891 * 10240; time = 1.4042s; samplesPerSecond = 7292.5
 MPI Rank 1: 08/16/2016 10:00:08: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.98252294 * 20480; EvalClassificationError = 0.54741211 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=2.39895s
@ -1664,7 +1664,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 09:59:56: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:59:56: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.62304965 * 640; EvalClassificationError = 0.93437500 * 640; time = 0.2618s; samplesPerSecond = 2444.4
 MPI Rank 2: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.38132581 * 640; EvalClassificationError = 0.93125000 * 640; time = 0.1965s; samplesPerSecond = 3256.9
 MPI Rank 2: 08/16/2016 09:59:57:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98762394 * 640; EvalClassificationError = 0.88593750 * 640; time = 0.4698s; samplesPerSecond = 1362.4
@ -1702,7 +1702,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:00:02: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:00:02: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:00:03:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.16726831 * 2560; EvalClassificationError = 0.57851562 * 2560; time = 0.5955s; samplesPerSecond = 4298.6
 MPI Rank 2: 08/16/2016 10:00:03:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.08752017 * 2560; EvalClassificationError = 0.56523437 * 2560; time = 0.3830s; samplesPerSecond = 6684.6
 MPI Rank 2: 08/16/2016 10:00:04:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.07134545 * 2560; EvalClassificationError = 0.55585938 * 2560; time = 0.6922s; samplesPerSecond = 3698.2
@ -1716,7 +1716,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:00:06: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:00:06: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:00:07:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.97114275 * 10240; EvalClassificationError = 0.53769531 * 10240; time = 0.9897s; samplesPerSecond = 10346.1
 MPI Rank 2: 08/16/2016 10:00:08:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.99390313 * 10240; EvalClassificationError = 0.55712891 * 10240; time = 1.4041s; samplesPerSecond = 7292.7
 MPI Rank 2: 08/16/2016 10:00:08: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.98252294 * 20480; EvalClassificationError = 0.54741211 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=2.39757s
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.gpu.txt
@ -572,7 +572,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:00:15: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0693s; samplesPerSecond = 9238.4
 MPI Rank 0: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0567s; samplesPerSecond = 11281.5
 MPI Rank 0: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0566s; samplesPerSecond = 11312.8
@ -611,7 +611,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:00:17: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.0612s; samplesPerSecond = 41819.8
 MPI Rank 0: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0599s; samplesPerSecond = 42761.5
 MPI Rank 0: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0599s; samplesPerSecond = 42762.2
@ -626,7 +626,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 10:00:17: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 10:00:18:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.0894s; samplesPerSecond = 114526.0
 MPI Rank 0: 08/16/2016 10:00:18:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.0835s; samplesPerSecond = 122653.8
 MPI Rank 0: 08/16/2016 10:00:18: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.179159s
@ -1121,7 +1121,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:00:15: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0695s; samplesPerSecond = 9204.8
 MPI Rank 1: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0563s; samplesPerSecond = 11359.2
 MPI Rank 1: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0567s; samplesPerSecond = 11286.5
@ -1159,7 +1159,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:00:17: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.0614s; samplesPerSecond = 41699.9
 MPI Rank 1: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0597s; samplesPerSecond = 42846.6
 MPI Rank 1: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0599s; samplesPerSecond = 42717.2
@ -1173,7 +1173,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 10:00:17: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 10:00:18:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.0900s; samplesPerSecond = 113825.8
 MPI Rank 1: 08/16/2016 10:00:18:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.0832s; samplesPerSecond = 123133.2
 MPI Rank 1: 08/16/2016 10:00:18: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.179295s
@ -1667,7 +1667,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:00:15: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:00:15: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.12%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0697s; samplesPerSecond = 9180.2
 MPI Rank 2: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0565s; samplesPerSecond = 11323.4
 MPI Rank 2: 08/16/2016 10:00:15:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0568s; samplesPerSecond = 11270.0
@ -1705,7 +1705,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:00:17: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.0612s; samplesPerSecond = 41826.0
 MPI Rank 2: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0597s; samplesPerSecond = 42906.2
 MPI Rank 2: 08/16/2016 10:00:17:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0601s; samplesPerSecond = 42627.6
@ -1719,7 +1719,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 10:00:17: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 10:00:17: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 10:00:18:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.0899s; samplesPerSecond = 113873.9
 MPI Rank 2: 08/16/2016 10:00:18:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.0835s; samplesPerSecond = 122592.2
 MPI Rank 2: 08/16/2016 10:00:18: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-05; epochTime=0.179288s
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.windows.cpu.txt
@ -569,7 +569,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:02:57: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.56731197 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.0834s; samplesPerSecond = 7677.6
 MPI Rank 0: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.31208884 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.0883s; samplesPerSecond = 7250.5
 MPI Rank 0: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.97319817 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.0900s; samplesPerSecond = 7107.4
@ -608,7 +608,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:00: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.09962837 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.1410s; samplesPerSecond = 18159.2
 MPI Rank 0: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02412398 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.1391s; samplesPerSecond = 18410.2
 MPI Rank 0: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00477328 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.1341s; samplesPerSecond = 19084.0
@ -623,7 +623,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:01: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:03:02:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90951136 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.3803s; samplesPerSecond = 26929.2
 MPI Rank 0: 08/16/2016 03:03:02:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93082558 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.3565s; samplesPerSecond = 28721.0
 MPI Rank 0: 08/16/2016 03:03:02: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92016847 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.746914s
@ -1118,7 +1118,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:02:57: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.56731197 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.0834s; samplesPerSecond = 7671.1
 MPI Rank 1: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.31208884 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.0881s; samplesPerSecond = 7260.9
 MPI Rank 1: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.97319817 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.0901s; samplesPerSecond = 7105.9
@ -1156,7 +1156,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:00: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.09962837 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.1412s; samplesPerSecond = 18128.6
 MPI Rank 1: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02412398 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.1389s; samplesPerSecond = 18430.0
 MPI Rank 1: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00477328 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.1342s; samplesPerSecond = 19081.7
@ -1170,7 +1170,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:01: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:03:02:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90951136 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.3812s; samplesPerSecond = 26864.6
 MPI Rank 1: 08/16/2016 03:03:02:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93082558 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.3564s; samplesPerSecond = 28728.9
 MPI Rank 1: 08/16/2016 03:03:02: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92016847 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.746915s
@ -1664,7 +1664,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:02:57: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:02:57: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.56731197 * 640; EvalClassificationError = 0.91718750 * 640; time = 0.0829s; samplesPerSecond = 7722.7
 MPI Rank 2: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.31208884 * 640; EvalClassificationError = 0.92812500 * 640; time = 0.0881s; samplesPerSecond = 7266.9
 MPI Rank 2: 08/16/2016 03:02:57:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.97319817 * 640; EvalClassificationError = 0.87343750 * 640; time = 0.0901s; samplesPerSecond = 7102.3
@ -1702,7 +1702,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:03:00: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:03:00: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.09962837 * 2560; EvalClassificationError = 0.56132812 * 2560; time = 0.1413s; samplesPerSecond = 18119.0
 MPI Rank 2: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.02412398 * 2560; EvalClassificationError = 0.55000000 * 2560; time = 0.1389s; samplesPerSecond = 18436.9
 MPI Rank 2: 08/16/2016 03:03:00:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 2.00477328 * 2560; EvalClassificationError = 0.54296875 * 2560; time = 0.1341s; samplesPerSecond = 19089.9
@ -1716,7 +1716,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:03:01: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:03:01: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:03:02:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.90951136 * 10240; EvalClassificationError = 0.52617187 * 10240; time = 0.3813s; samplesPerSecond = 26856.3
 MPI Rank 2: 08/16/2016 03:03:02:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.93082558 * 10240; EvalClassificationError = 0.54072266 * 10240; time = 0.3565s; samplesPerSecond = 28725.1
 MPI Rank 2: 08/16/2016 03:03:02: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.92016847 * 20480; EvalClassificationError = 0.53344727 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.746861s
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/baseline.windows.gpu.txt
@ -570,7 +570,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:17: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 0: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.1000s; samplesPerSecond = 6400.3
 MPI Rank 0: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0738s; samplesPerSecond = 8674.2
 MPI Rank 0: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0762s; samplesPerSecond = 8396.9
@ -609,7 +609,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:19: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.1045s; samplesPerSecond = 24493.4
 MPI Rank 0: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0862s; samplesPerSecond = 29707.7
 MPI Rank 0: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0768s; samplesPerSecond = 33337.7
@ -624,7 +624,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:20: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 0: 08/16/2016 03:03:20:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.1518s; samplesPerSecond = 67460.3
 MPI Rank 0: 08/16/2016 03:03:20:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.1251s; samplesPerSecond = 81832.3
 MPI Rank 0: 08/16/2016 03:03:20: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.287003s
@ -1120,7 +1120,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:17: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 1: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0996s; samplesPerSecond = 6427.7
 MPI Rank 1: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0742s; samplesPerSecond = 8629.6
 MPI Rank 1: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0762s; samplesPerSecond = 8393.6
@ -1158,7 +1158,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:19: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.1047s; samplesPerSecond = 24439.6
 MPI Rank 1: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0858s; samplesPerSecond = 29832.0
 MPI Rank 1: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0767s; samplesPerSecond = 33361.1
@ -1172,7 +1172,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:20: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 1: 08/16/2016 03:03:20:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.1520s; samplesPerSecond = 67362.2
 MPI Rank 1: 08/16/2016 03:03:20:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.1252s; samplesPerSecond = 81779.3
 MPI Rank 1: 08/16/2016 03:03:20: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.287153s
@ -1667,7 +1667,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:03:17: Starting Epoch 1: learning rate per sample = 0.015625  effective momentum = 0.900000  momentum as time constant = 607.4 samples
 MPI Rank 2: minibatchiterator: epoch 0: frames [0..20480] (first utterance at frame 0), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:03:17: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[   1-  10, 3.13%]: CrossEntropyWithSoftmax = 4.53638629 * 640; EvalClassificationError = 0.92031250 * 640; time = 0.0986s; samplesPerSecond = 6490.4
 MPI Rank 2: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[  11-  20, 6.25%]: CrossEntropyWithSoftmax = 4.32517790 * 640; EvalClassificationError = 0.92500000 * 640; time = 0.0740s; samplesPerSecond = 8642.9
 MPI Rank 2: 08/16/2016 03:03:17:  Epoch[ 1 of 3]-Minibatch[  21-  30, 9.38%]: CrossEntropyWithSoftmax = 3.98246287 * 640; EvalClassificationError = 0.87187500 * 640; time = 0.0763s; samplesPerSecond = 8383.7
@ -1705,7 +1705,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:03:19: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:03:19: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[   1-  10, 12.50%]: CrossEntropyWithSoftmax = 2.08889863 * 2560; EvalClassificationError = 0.56367188 * 2560; time = 0.1034s; samplesPerSecond = 24758.7
 MPI Rank 2: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[  11-  20, 25.00%]: CrossEntropyWithSoftmax = 2.00776218 * 2560; EvalClassificationError = 0.54218750 * 2560; time = 0.0861s; samplesPerSecond = 29745.7
 MPI Rank 2: 08/16/2016 03:03:20:  Epoch[ 2 of 3]-Minibatch[  21-  30, 37.50%]: CrossEntropyWithSoftmax = 1.99260186 * 2560; EvalClassificationError = 0.54257813 * 2560; time = 0.0769s; samplesPerSecond = 33306.0
@ -1719,7 +1719,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:03:20: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 32), distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:03:20: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 32), distributed reading is ENABLED.
 MPI Rank 2: 08/16/2016 03:03:20:  Epoch[ 3 of 3]-Minibatch[   1-  10, 50.00%]: CrossEntropyWithSoftmax = 1.89820595 * 10240; EvalClassificationError = 0.52470703 * 10240; time = 0.1506s; samplesPerSecond = 67980.7
 MPI Rank 2: 08/16/2016 03:03:20:  Epoch[ 3 of 3]-Minibatch[  11-  20, 100.00%]: CrossEntropyWithSoftmax = 1.91958075 * 10240; EvalClassificationError = 0.53974609 * 10240; time = 0.1252s; samplesPerSecond = 81781.3
 MPI Rank 2: 08/16/2016 03:03:20: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 1.90889335 * 20480; EvalClassificationError = 0.53222656 * 20480; totalSamplesSeen = 61440; learningRatePerSample = 9.7656251e-005; epochTime=0.286844s
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/testcases.yml
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantization/testcases.yml
@ -34,7 +34,7 @@ testCases:
      - ^MPI Rank {{integer}}
      - Starting minibatch loop
      - DataParallelSGD training
-      - MyRank = {{integer}}
-      - NumNodes = 3
-      - NumGradientBits = 32
+      - myRank = {{integer}}
+      - numNodes = 3
+      - numGradientBits = 32
      - distributed reading is ENABLED
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.cpu.txt
@ -622,7 +622,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:58:46: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.008473
 MPI Rank 0: Async gradient aggregation wait time: 0.00554
 MPI Rank 0: Actual gradient aggregation time: 0.020395
@ -670,7 +670,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:58:51: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 1.1e-05
 MPI Rank 0: Actual gradient aggregation time: 0.009207
 MPI Rank 0: Async gradient aggregation wait time: 9e-06
@ -687,7 +687,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:58:54: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.0046
 MPI Rank 0: Actual gradient aggregation time: 0.069203
 MPI Rank 0: Async gradient aggregation wait time: 0.041271
@ -1240,7 +1240,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:58:46: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.063039
 MPI Rank 1: Async gradient aggregation wait time: 0.022678
 MPI Rank 1: Actual gradient aggregation time: 0.025025
@ -1287,7 +1287,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:58:51: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.142203
 MPI Rank 1: Actual gradient aggregation time: 0.157984
 MPI Rank 1: Async gradient aggregation wait time: 0.179014
@ -1303,7 +1303,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:58:54: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.04561
 MPI Rank 1: Actual gradient aggregation time: 0.077514
 MPI Rank 1: Async gradient aggregation wait time: 0.069416
@ -1855,7 +1855,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 09:58:46: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:58:46: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.041047
 MPI Rank 2: Async gradient aggregation wait time: 8e-06
 MPI Rank 2: Actual gradient aggregation time: 0.004986
@ -1902,7 +1902,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 09:58:51: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:58:51: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 9e-06
 MPI Rank 2: Actual gradient aggregation time: 0.149714
 MPI Rank 2: Async gradient aggregation wait time: 0.120817
@ -1918,7 +1918,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 09:58:54: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:58:54: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 9e-06
 MPI Rank 2: Actual gradient aggregation time: 0.002705
 MPI Rank 2: Async gradient aggregation wait time: 9e-06
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.gpu.txt
@ -623,7 +623,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:59:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.01227
 MPI Rank 0: Async gradient aggregation wait time: 0.00776
 MPI Rank 0: Actual gradient aggregation time: 0.011351
@ -671,7 +671,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:59:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.000306
 MPI Rank 0: Actual gradient aggregation time: 0.017813
 MPI Rank 0: Async gradient aggregation wait time: 0.003066
@ -688,7 +688,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 09:59:10: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.003211
 MPI Rank 0: Actual gradient aggregation time: 0.026824
 MPI Rank 0: Async gradient aggregation wait time: 0.002719
@ -1242,7 +1242,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:59:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.0237
 MPI Rank 1: Async gradient aggregation wait time: 0.00528
 MPI Rank 1: Actual gradient aggregation time: 0.011811
@ -1289,7 +1289,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:59:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.013339
 MPI Rank 1: Actual gradient aggregation time: 0.032552
 MPI Rank 1: Async gradient aggregation wait time: 0.007984
@ -1305,7 +1305,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 09:59:10: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.002841
 MPI Rank 1: Actual gradient aggregation time: 0.027316
 MPI Rank 1: Async gradient aggregation wait time: 0.002301
@ -1858,7 +1858,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 09:59:09: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:59:09: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.028506
 MPI Rank 2: Async gradient aggregation wait time: 0.004749
 MPI Rank 2: Actual gradient aggregation time: 0.011317
@ -1905,7 +1905,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 09:59:10: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.003625
 MPI Rank 2: Actual gradient aggregation time: 0.032563
 MPI Rank 2: Async gradient aggregation wait time: 0.012159
@ -1921,7 +1921,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 09:59:10: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 09:59:10: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.016234
 MPI Rank 2: Actual gradient aggregation time: 0.026881
 MPI Rank 2: Async gradient aggregation wait time: 0.005475
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.cpu.txt
@ -617,7 +617,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:54: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.010875
 MPI Rank 0: Async gradient aggregation wait time: 0.046041
 MPI Rank 0: Actual gradient aggregation time: 0.070247
@ -665,7 +665,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:03:59: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 2e-006
 MPI Rank 0: Actual gradient aggregation time: 0.106492
 MPI Rank 0: Async gradient aggregation wait time: 2e-006
@ -682,7 +682,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:04:02: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
 MPI Rank 0: Actual gradient aggregation time: 0.010023
 MPI Rank 0: Async gradient aggregation wait time: 3e-006
@ -1233,7 +1233,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:54: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.033393
 MPI Rank 1: Async gradient aggregation wait time: 0.005092
 MPI Rank 1: Actual gradient aggregation time: 0.070288
@ -1280,7 +1280,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:03:59: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.056538
 MPI Rank 1: Actual gradient aggregation time: 0.181181
 MPI Rank 1: Async gradient aggregation wait time: 3e-006
@ -1296,7 +1296,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:04:02: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 2e-006
 MPI Rank 1: Actual gradient aggregation time: 0.028628
 MPI Rank 1: Async gradient aggregation wait time: 0.062352
@ -1846,7 +1846,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:03:54: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:03:54: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.040872
 MPI Rank 2: Async gradient aggregation wait time: 0.04797
 MPI Rank 2: Actual gradient aggregation time: 0.070448
@ -1893,7 +1893,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:03:59: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:03:59: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.103418
 MPI Rank 2: Actual gradient aggregation time: 0.168332
 MPI Rank 2: Async gradient aggregation wait time: 0.014615
@ -1909,7 +1909,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:04:02: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:04:02: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 3e-006
 MPI Rank 2: Actual gradient aggregation time: 0.092817
 MPI Rank 2: Async gradient aggregation wait time: 0.095403
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/baseline.windows.gpu.txt
@ -618,7 +618,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:04:21: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 0: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Actual gradient aggregation time: 0.01782
 MPI Rank 0: Async gradient aggregation wait time: 0.005297
 MPI Rank 0: Actual gradient aggregation time: 0.025182
@ -666,7 +666,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:04:23: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.003725
 MPI Rank 0: Actual gradient aggregation time: 0.069103
 MPI Rank 0: Async gradient aggregation wait time: 0.001861
@ -683,7 +683,7 @@ MPI Rank 0:
 MPI Rank 0: 08/16/2016 03:04:25: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 0: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 0 of 3, with 1 datapasses
 MPI Rank 0: 
-MPI Rank 0: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 0: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (myRank = 0, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 0: Async gradient aggregation wait time: 0.033541
 MPI Rank 0: Actual gradient aggregation time: 0.07365
 MPI Rank 0: Async gradient aggregation wait time: 0.011228
@ -1235,7 +1235,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:04:21: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 1: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Actual gradient aggregation time: 0.020729
 MPI Rank 1: Async gradient aggregation wait time: 0.009212
 MPI Rank 1: Actual gradient aggregation time: 0.025214
@ -1282,7 +1282,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:04:23: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.061455
 MPI Rank 1: Actual gradient aggregation time: 0.070776
 MPI Rank 1: Async gradient aggregation wait time: 0.04993
@ -1298,7 +1298,7 @@ MPI Rank 1:
 MPI Rank 1: 08/16/2016 03:04:25: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 1: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 1 of 3, with 1 datapasses
 MPI Rank 1: 
-MPI Rank 1: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (MyRank = 1, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 1: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (myRank = 1, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 1: Async gradient aggregation wait time: 0.006886
 MPI Rank 1: Actual gradient aggregation time: 0.071953
 MPI Rank 1: Async gradient aggregation wait time: 0.012085
@ -1849,7 +1849,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:04:21: Starting Epoch 2: learning rate per sample = 0.001953  effective momentum = 0.656119  momentum as time constant = 607.5 samples
 MPI Rank 2: minibatchiterator: epoch 1: frames [20480..40960] (first utterance at frame 20480), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:04:21: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Actual gradient aggregation time: 0.03301
 MPI Rank 2: Async gradient aggregation wait time: 0.004502
 MPI Rank 2: Actual gradient aggregation time: 0.025447
@ -1896,7 +1896,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:04:23: Starting Epoch 3: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 2: frames [40960..61440] (first utterance at frame 40960), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:04:23: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.001929
 MPI Rank 2: Actual gradient aggregation time: 0.069767
 MPI Rank 2: Async gradient aggregation wait time: 0.051731
@ -1912,7 +1912,7 @@ MPI Rank 2:
 MPI Rank 2: 08/16/2016 03:04:25: Starting Epoch 4: learning rate per sample = 0.000098  effective momentum = 0.656119  momentum as time constant = 2429.9 samples
 MPI Rank 2: minibatchiterator: epoch 3: frames [61440..81920] (first utterance at frame 61440), data subset 2 of 3, with 1 datapasses
 MPI Rank 2: 
-MPI Rank 2: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (MyRank = 2, NumNodes = 3, NumGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
+MPI Rank 2: 08/16/2016 03:04:25: Starting minibatch loop, DataParallelSGD training (myRank = 2, numNodes = 3, numGradientBits = 64), BufferedAsyncGradientAggregation is ENABLED, distributed reading is ENABLED.
 MPI Rank 2: Async gradient aggregation wait time: 0.005577
 MPI Rank 2: Actual gradient aggregation time: 0.072623
 MPI Rank 2: Async gradient aggregation wait time: 0.000919
--- a/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/testcases.yml
+++ b/Tests/EndToEndTests/Speech/HTKDeserializers/DNN/ParallelNoQuantizationBufferedAsyncGradientAggregation/testcases.yml
@ -34,8 +34,8 @@ testCases:
      - ^MPI Rank {{integer}}
      - Starting minibatch loop
      - DataParallelSGD training
-      - MyRank = {{integer}}
-      - NumNodes = 3
-      - NumGradientBits = 64
+      - myRank = {{integer}}
+      - numNodes = 3
+      - numGradientBits = 64
      - distributed reading is ENABLED
      - BufferedAsyncGradientAggregation is ENABLED
--- a/Tutorials/ImageHandsOn/ImageHandsOn_Task6.cntk
+++ b/Tutorials/ImageHandsOn/ImageHandsOn_Task6.cntk
@ -98,7 +98,7 @@ TrainConvNet = {
            parallelizationMethod = "DataParallelSGD"
            parallelizationStartEpoch = 1
            distributedMBReading = true
-            dataParallelSGD = { gradientBits = 2 }
+            dataParallelSGD = { gradientBits = 2:1 }
        }
        AutoAdjust = {
            autoAdjustMinibatch = true        # enable automatic growing of minibatch size